# CIFAR10 with CNN
code from https://github.com/fchollet/keras/blob/master/examples/mnist_cnn.py


This notebook trains a simple convolutional neural network on the CIFAR10 small images dataset. 



In [1]:
# import libraries
from __future__ import print_function
from __future__ import print_function
import keras
from keras.datasets import cifar10
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Conv2D, MaxPooling2D

#for confusion matrix
import numpy as np
import sklearn
from sklearn.metrics import confusion_matrix


Using TensorFlow backend.


Prepare data

In [2]:
# define constants
batch_size = 128
num_classes = 10
epochs = 20


# The data, shuffled and split between train and test sets:
(x_train, y_train), (x_test, y_test) = cifar10.load_data()
print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')

# Convert class vectors to binary class matrices.
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

# Cast features into correct data type then scale features
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255

Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz
x_train shape: (50000, 32, 32, 3)
50000 train samples
10000 test samples


Define model

In [3]:
model = Sequential()

model.add(Conv2D(32, (3, 3), padding='same',
                 input_shape=x_train.shape[1:]))
model.add(Activation('relu'))
model.add(Conv2D(32, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Conv2D(64, (3, 3), padding='same'))
model.add(Activation('relu'))
model.add(Conv2D(64, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes))
model.add(Activation('softmax'))


Show model structure

In [4]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 32, 32, 32)        896       
_________________________________________________________________
activation_1 (Activation)    (None, 32, 32, 32)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 30, 30, 32)        9248      
_________________________________________________________________
activation_2 (Activation)    (None, 30, 30, 32)        0         
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 15, 15, 32)        0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 15, 15, 32)        0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 15, 15, 64)        18496     
__________

Compile model and fit


In [5]:
# initiate RMSprop optimizer
opt = keras.optimizers.rmsprop(lr=0.0001, decay=1e-6)

# Let's train the model using RMSprop
model.compile(loss='categorical_crossentropy',
              optimizer=opt,
                metrics=['accuracy'])


model.fit(x_train, y_train,
            batch_size=batch_size,
            epochs=epochs,
            validation_data=(x_test, y_test),
            shuffle=True)




Train on 50000 samples, validate on 10000 samples
Epoch 1/20
Epoch 2/20

Epoch 3/20
Epoch 4/20

Epoch 5/20
Epoch 6/20

Epoch 7/20
Epoch 8/20

Epoch 9/20
Epoch 10/20

Epoch 11/20
Epoch 12/20

Epoch 13/20
Epoch 14/20

Epoch 15/20
Epoch 16/20

Epoch 17/20
Epoch 18/20

Epoch 19/20
Epoch 20/20



<keras.callbacks.History at 0x7f8be6958f98>

In [6]:
# Save the original model
model.save('cifra10_base.h5')

In [7]:
# Print base model loss and accuracy
print('\n  - Base case:')
model = keras.models.load_model("cifra10_base.h5")
scores = model.evaluate(x_test, y_test, verbose=0)
print('\tTest loss:\t', scores[0])
print('\tTest accuracy:\t', scores[1])
Y_pred = model.predict(x_test, verbose=2)
cm = confusion_matrix(np.argmax(y_test,axis=1),np.argmax(Y_pred, axis=1))
print('\tConfusion Matrix:\t')
print(cm)


  - Base case:
	Test loss:	 0.8781241746902466
	Test accuracy:	 0.6973
	Confusion Matrix:	
[[733  29  39  18  24   6  10  12  56  73]
 [  4 841   2   4   4   1   8   4  10 122]
 [ 72   8 470  61 165  65  69  55  14  21]
 [ 19  13  51 468 121 147  68  60  14  39]
 [ 21   5  29  44 723  14  47  97  14   6]
 [  9  11  35 179  77 564  27  77   6  15]
 [  8   4  25  54  71  17 784  13   5  19]
 [ 10   5  18  23  85  46   5 781   2  25]
 [ 65  66  11  13   6   5   5   5 769  55]
 [ 16  77   4  10   8   3   6  18  18 840]]


### 1) Augment the data by adding noise.

In [19]:
# Save models with gaussian noise with different Standard Deviation
for e in [0.5,0.1,0.05,0.02,0.01]:  
  model = Sequential()
  model.add(Conv2D(32, (3, 3), padding='same',input_shape=x_train.shape[1:]))
  model.add(Activation('relu'))
  model.add(Conv2D(32, (3, 3)))
  model.add(Activation('relu'))
  model.add(MaxPooling2D(pool_size=(2, 2)))
  model.add(Dropout(0.25))
  
  #Add noise
  model.add(keras.layers.GaussianNoise(e))

  model.add(Conv2D(64, (3, 3), padding='same'))
  model.add(Activation('relu'))
  model.add(Conv2D(64, (3, 3)))
  model.add(Activation('relu'))
  model.add(MaxPooling2D(pool_size=(2, 2)))
  model.add(Dropout(0.25))

  model.add(Flatten())
  model.add(Dense(512))
  model.add(Activation('relu'))
  model.add(Dropout(0.5))
  model.add(Dense(num_classes))
  model.add(Activation('softmax'))
  
  model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])
  model.fit(x_train, y_train,
                batch_size=batch_size,
                epochs=epochs,
                validation_data=(x_test, y_test),
                shuffle=True)
  model.save('cifra10_n'+str(e)+'.h5')

Train on 50000 samples, validate on 10000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
 8832/50000 [====>.........................] - ETA: 10s - loss: 1.9837 - acc: 0.2656

Epoch 4/20
Epoch 5/20

Epoch 6/20
Epoch 7/20

Epoch 8/20
Epoch 9/20

Epoch 10/20
Epoch 11/20

Epoch 12/20
Epoch 13/20

Epoch 14/20
Epoch 15/20

Epoch 16/20
Epoch 17/20

Epoch 18/20
Epoch 19/20

Epoch 20/20
Train on 50000 samples, validate on 10000 samples
Epoch 1/20

Epoch 2/20
Epoch 3/20

Epoch 4/20
Epoch 5/20

Epoch 6/20
Epoch 7/20

Epoch 8/20
Epoch 9/20

Epoch 10/20
Epoch 11/20

Epoch 12/20
Epoch 13/20

Epoch 14/20
Epoch 15/20

Epoch 16/20
Epoch 17/20

Epoch 18/20
Epoch 19/20

Epoch 20/20
Train on 50000 samples, validate on 10000 samples
Epoch 1/20

Epoch 2/20
Epoch 3/20

Epoch 4/20
Epoch 5/20

Epoch 6/20
Epoch 7/20

Epoch 8/20
Epoch 9/20

Epoch 10/20
Epoch 11/20

Epoch 12/20
Epoch 13/20

Epoch 14/20
Epoch 15/20

Epoch 16/20
Epoch 17/20

Epoch 18/20
Epoch 19/20

Epoch 20/20
Train on 50000 samples, validate on 10000 samples
Epoch 1/20

Epoch 2/20
Epoch 3/20

Epoch 4/20
Epoch 5/20

Epoch 6/20
Epoch 7/20

Epoch 8/20
Epoch 9/20

Epoch 10/20
Epoch 11/20

Epoch 12/20
Epoch 13/20

Epoch 14/20
Epoch 15/20

Epoch 16/20
Epoch 17/20

Epoch 18/20
Epoch 19/20

Epoch 20/20
Train on 50000 samples, validate on 10000 samples
Epoch 1/20

Epoch 2/20
Epoch 3/20

Epoch 4/20
Epoch 5/20

Epoch 6/20
Epoch 7/20

Epoch 8/20
Epoch 9/20

Epoch 10/20
Epoch 11/20

Epoch 12/20
Epoch 13/20

Epoch 14/20
Epoch 15/20

Epoch 16/20
Epoch 17/20

Epoch 18/20
Epoch 19/20

Epoch 20/20


In [20]:
# Load models and print results
for e in [0.5,0.1,0.05,0.02,0.01]:
    model = keras.models.load_model('cifra10_n'+str(e)+'.h5')
    print('\n  - Gaussian Noise with '+str(e)+' stdev:')
    scores = model.evaluate(x_test, y_test, verbose=0)
    print('\tTest loss:\t', scores[0])
    print('\tTest accuracy:\t', scores[1])


  - Gaussian Noise with 0.5 stdev:
	Test loss:	 1.3755655250549317
	Test accuracy:	 0.5045

  - Gaussian Noise with 0.1 stdev:
	Test loss:	 0.9894431276321412
	Test accuracy:	 0.6573

  - Gaussian Noise with 0.05 stdev:
	Test loss:	 0.9525003180503845
	Test accuracy:	 0.6709

  - Gaussian Noise with 0.02 stdev:
	Test loss:	 0.9068649646759033
	Test accuracy:	 0.6836

  - Gaussian Noise with 0.01 stdev:
	Test loss:	 0.9086172308921814
	Test accuracy:	 0.6785


### 2) Add layers to the network to improve performance

#### First model: batch normalization

In [9]:
from keras.layers.normalization import BatchNormalization

model = Sequential()

model.add(Conv2D(32, (3, 3), padding='same',
                 input_shape=x_train.shape[1:]))

model.add(BatchNormalization())

model.add(Activation('relu'))
model.add(Conv2D(32, (3, 3)))

model.add(BatchNormalization())

model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Conv2D(64, (3, 3), padding='same'))

model.add(BatchNormalization())

model.add(Activation('relu'))
model.add(Conv2D(64, (3, 3)))

model.add(BatchNormalization())

model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes))
model.add(Activation('softmax'))

model.compile(loss='categorical_crossentropy',
              optimizer=opt,
                metrics=['accuracy'])
model.fit(x_train, y_train,
            batch_size=batch_size,
            epochs=epochs,
            validation_data=(x_test, y_test),
            shuffle=True)

model.save('cifra10_batch.h5')


Train on 50000 samples, validate on 10000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
 8576/50000 [====>.........................] - ETA: 13s - loss: 1.4671 - acc: 0.4692

Epoch 4/20
Epoch 5/20

Epoch 6/20
Epoch 7/20

Epoch 8/20
Epoch 9/20

Epoch 10/20
Epoch 11/20

Epoch 12/20
Epoch 13/20

Epoch 14/20
Epoch 15/20

Epoch 16/20
Epoch 17/20

Epoch 18/20
Epoch 19/20

Epoch 20/20


In [10]:

model = keras.models.load_model('cifra10_batch.h5')
print('\n  - Model with Batch Normalization:')
scores = model.evaluate(x_test, y_test, verbose=0)
print('\tTest loss:\t', scores[0])
print('\tTest accuracy:\t', scores[1])

Y_pred = model.predict(x_test, verbose=2)
cm = confusion_matrix(np.argmax(y_test,axis=1),np.argmax(Y_pred, axis=1))
print('\tConfusion Matrix:\t')
print(cm)


  - Model with Batch Normalization:
	Test loss:	 0.7756728023529053
	Test accuracy:	 0.7313
	Confusion Matrix:	
[[740  11  72  13  44   6  18  24  51  21]
 [ 20 831   8  15   6   5  24   4  25  62]
 [ 46   1 580  34 151  52 104  21   7   4]
 [  8   3  57 451 111 181 150  29   8   2]
 [  7   2  28  25 817  12  75  30   4   0]
 [  5   0  48 147  96 625  42  33   3   1]
 [  3   0  26  30  38   5 893   1   4   0]
 [  8   1  37  18 104  49  16 766   0   1]
 [ 54  25  16  13  13   5  10   2 847  15]
 [ 37  59  10  21  21  12  19  23  35 763]]


#### Second model: regularizer

In [11]:
from keras.layers.normalization import BatchNormalization
from keras import regularizers

weight_decay = 1e-4

model = Sequential()

model.add(Conv2D(32, (3, 3), padding='same',
                 input_shape=x_train.shape[1:]))

model.add(Activation('relu'))
model.add(Conv2D(32, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Conv2D(64, (3, 3), padding='same', kernel_regularizer=regularizers.l2(weight_decay)))
model.add(Activation('relu'))
model.add(Conv2D(64, (3, 3), kernel_regularizer=regularizers.l2(weight_decay)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes))
model.add(Activation('softmax'))

model.compile(loss='categorical_crossentropy',
              optimizer=opt,
                metrics=['accuracy'])
model.fit(x_train, y_train,
            batch_size=batch_size,
            epochs=epochs,
            validation_data=(x_test, y_test),
            shuffle=True)

model.save('cifra10_regL2.h5')

Train on 50000 samples, validate on 10000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
 9088/50000 [====>.........................] - ETA: 10s - loss: 1.5727 - acc: 0.4316

Epoch 4/20
Epoch 5/20

Epoch 6/20
Epoch 7/20

Epoch 8/20
Epoch 9/20

Epoch 10/20
Epoch 11/20

Epoch 12/20
Epoch 13/20

Epoch 14/20
Epoch 15/20

Epoch 16/20
Epoch 17/20

Epoch 18/20
Epoch 19/20

Epoch 20/20


In [12]:
model= keras.models.load_model('cifra10_regL2.h5')
print('\n  - regularizer L2:')
scores = model.evaluate(x_test, y_test, verbose=0)
print('\tTest loss:\t', scores[0])
print('\tTest accuracy:\t', scores[1])

Y_pred = model.predict(x_test, verbose=2)
cm = confusion_matrix(np.argmax(y_test,axis=1),np.argmax(Y_pred, axis=1))
print('\tConfusion Matrix:\t')
print(cm)


  - regularizer L2:
	Test loss:	 0.8838677503585816
	Test accuracy:	 0.7
	Confusion Matrix:	
[[714  26  41  25  17   5  15   9 103  45]
 [ 15 858   5  12   4   3  15   3  34  51]
 [ 65   8 535  72 112  67  90  25  14  12]
 [ 15   9  63 525  85 134 113  21  22  13]
 [ 22   4  52  53 656  21 108  61  17   6]
 [  5   4  57 201  57 552  61  47   8   8]
 [  4   3  33  43  33  10 855   5   8   6]
 [ 13   5  28  47  83  60  14 731   4  15]
 [ 48  33  10  15   6   8   9   3 845  23]
 [ 24 123   6  20   8   4  26  15  45 729]]


### 3) Try another method for dealing with overfitting

In [13]:
from keras.layers.normalization import BatchNormalization
from keras import regularizers

weight_decay = 1e-4

model = Sequential()

model.add(Conv2D(32, (3, 3), padding='same',
                 input_shape=x_train.shape[1:]))

model.add(BatchNormalization())

model.add(Activation('relu'))
model.add(Conv2D(32, (3, 3)))

model.add(BatchNormalization())

model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Conv2D(64, (3, 3), padding='same', kernel_regularizer=regularizers.l2(weight_decay)))

model.add(BatchNormalization())

model.add(Activation('relu'))
model.add(Conv2D(64, (3, 3), kernel_regularizer=regularizers.l2(weight_decay)))

model.add(BatchNormalization())

model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes))
model.add(Activation('softmax'))

model.compile(loss='categorical_crossentropy',
              optimizer=opt,
                metrics=['accuracy'])
model.fit(x_train, y_train,
            batch_size=batch_size,
            epochs=epochs,
            validation_data=(x_test, y_test),
            shuffle=True)

model.save('cifra10_batch_regL2.h5')

Train on 50000 samples, validate on 10000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
 8576/50000 [====>.........................] - ETA: 13s - loss: 1.4778 - acc: 0.4641

Epoch 4/20
Epoch 5/20

Epoch 6/20
Epoch 7/20

Epoch 8/20
Epoch 9/20

Epoch 10/20
Epoch 11/20

Epoch 12/20
Epoch 13/20

Epoch 14/20
Epoch 15/20

Epoch 16/20
Epoch 17/20

Epoch 18/20
Epoch 19/20

Epoch 20/20


In [14]:
model.save('cifra10_batch_regL2.h5')
print('\n  - Model with batch normalization and regularizer:')
scores = model.evaluate(x_test, y_test, verbose=0)
print('\tTest loss:\t', scores[0])
print('\tTest accuracy:\t', scores[1])


  - Model with batch normalization and regularizer:
	Test loss:	 0.8522795192718506
	Test accuracy:	 0.7121


In [15]:
model = keras.models.load_model('cifra10_batch.h5')
model.add(keras.layers.GaussianNoise(0.01))
model.compile(loss='categorical_crossentropy',
              optimizer=opt,
                metrics=['accuracy'])
model.fit(x_train, y_train,
            batch_size=batch_size,
            epochs=epochs,
            validation_data=(x_test, y_test),
            shuffle=True)
model.save('cifra10_batch_n0.01.h5')

Train on 50000 samples, validate on 10000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
 8576/50000 [====>.........................] - ETA: 13s - loss: 0.8019 - acc: 0.7367

Epoch 4/20
Epoch 5/20

Epoch 6/20
Epoch 7/20

Epoch 8/20
Epoch 9/20

Epoch 10/20
Epoch 11/20

Epoch 12/20
Epoch 13/20

Epoch 14/20
Epoch 15/20

Epoch 16/20
Epoch 17/20

Epoch 18/20
Epoch 19/20

Epoch 20/20


In [16]:
model = keras.models.load_model('cifra10_batch_n0.01.h5')

print('\n  - Model with Batch Normalization and Gaussian Noise with 0.01 stdev:')
scores = model.evaluate(x_test, y_test, verbose=0)
print('\tTest loss:\t', scores[0])
print('\tTest accuracy:\t', scores[1])


  - Model with Batch Normalization and Gaussian Noise with 0.01 stdev:
	Test loss:	 0.6446445227622986
	Test accuracy:	 0.7755
