# CIFAR10 with CNN
code from https://github.com/fchollet/keras/blob/master/examples/mnist_cnn.py


This notebook trains a simple convolutional neural network on the CIFAR10 small images dataset. 



In [1]:
# import libraries
from __future__ import print_function
from __future__ import print_function
import keras
from keras.datasets import cifar10
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Conv2D, MaxPooling2D

#for confusion matrix
import numpy as np
import sklearn
from sklearn.metrics import confusion_matrix


Using TensorFlow backend.


Prepare data

In [2]:
# define constants
batch_size = 128
num_classes = 10
epochs = 20


# The data, shuffled and split between train and test sets:
(x_train, y_train), (x_test, y_test) = cifar10.load_data()
print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')

# Convert class vectors to binary class matrices.
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

# Cast features into correct data type then scale features
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255

Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz

x_train shape: (50000, 32, 32, 3)
50000 train samples
10000 test samples


Define model

In [3]:
model = Sequential()

model.add(Conv2D(32, (3, 3), padding='same',
                 input_shape=x_train.shape[1:]))
model.add(Activation('relu'))
model.add(Conv2D(32, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Conv2D(64, (3, 3), padding='same'))
model.add(Activation('relu'))
model.add(Conv2D(64, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes))
model.add(Activation('softmax'))


Show model structure

In [4]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 32, 32, 32)        896       
_________________________________________________________________
activation_1 (Activation)    (None, 32, 32, 32)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 30, 30, 32)        9248      
_________________________________________________________________
activation_2 (Activation)    (None, 30, 30, 32)        0         
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 15, 15, 32)        0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 15, 15, 32)        0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 15, 15, 64)        18496     
__________

Compile model and fit


In [5]:
# initiate RMSprop optimizer
opt = keras.optimizers.rmsprop(lr=0.0001, decay=1e-6)

# Let's train the model using RMSprop
model.compile(loss='categorical_crossentropy',
              optimizer=opt,
                metrics=['accuracy'])


model.fit(x_train, y_train,
            batch_size=batch_size,
            epochs=epochs,
            validation_data=(x_test, y_test),
            shuffle=True)




Train on 50000 samples, validate on 10000 samples
Epoch 1/20
Epoch 2/20

Epoch 3/20
Epoch 4/20

Epoch 5/20
Epoch 6/20

Epoch 7/20
Epoch 8/20

Epoch 9/20
Epoch 10/20

Epoch 11/20
Epoch 12/20

Epoch 13/20
Epoch 14/20

Epoch 15/20
Epoch 16/20

Epoch 17/20
Epoch 18/20

Epoch 19/20
Epoch 20/20



<keras.callbacks.History at 0x7f262e4dcbe0>

In [6]:
# Save the original model
model.save('cifra10_base.h5')

In [7]:
# Print base model loss and accuracy
print('\n  - Base case:')
model = keras.models.load_model("cifra10_base.h5")
scores = model.evaluate(x_test, y_test, verbose=0)
print('\tTest loss:\t', scores[0])
print('\tTest accuracy:\t', scores[1])
Y_pred = model.predict(x_test, verbose=2)
cm = confusion_matrix(np.argmax(y_test,axis=1),np.argmax(Y_pred, axis=1))
print('\tConfusion Matrix:\t')
print(cm)


  - Base case:
	Test loss:	 0.8952444440841675
	Test accuracy:	 0.6874
	Confusion Matrix:	
[[711  14  61  24  20  15   9  11 111  24]
 [ 24 806  16  11   6  10  14  10  46  57]
 [ 60   2 560  84 128  71  45  27  18   5]
 [ 19   7  79 527  95 164  49  31  20   9]
 [ 24   3  79  60 676  38  41  63  15   1]
 [ 11   2  62 185  64 603  20  46   6   1]
 [  5   3  56  86  65  22 748   9   6   0]
 [ 13   0  38  53  86  78   6 718   5   3]
 [ 56  28  21  19   8   9   6   4 838  11]
 [ 34 111  10  30  10  12  14  30  62 687]]


### 1) Augment the data by adding noise.

In [8]:
# Save models with gaussian noise with different Standard Deviation
for e in [0.5,0.1,0.05,0.02,0.01]:  
  model = Sequential()
  model.add(Conv2D(32, (3, 3), padding='same',input_shape=x_train.shape[1:]))
  model.add(Activation('relu'))
  model.add(Conv2D(32, (3, 3)))
  model.add(Activation('relu'))
  model.add(MaxPooling2D(pool_size=(2, 2)))
  model.add(Dropout(0.25))
  
  #Add noise
  model.add(keras.layers.GaussianNoise(e))

  model.add(Conv2D(64, (3, 3), padding='same'))
  model.add(Activation('relu'))
  model.add(Conv2D(64, (3, 3)))
  model.add(Activation('relu'))
  model.add(MaxPooling2D(pool_size=(2, 2)))
  model.add(Dropout(0.25))

  model.add(Flatten())
  model.add(Dense(512))
  model.add(Activation('relu'))
  model.add(Dropout(0.5))
  model.add(Dense(num_classes))
  model.add(Activation('softmax'))
  
  model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])
  model.fit(x_train, y_train,
                batch_size=batch_size,
                epochs=epochs,
                validation_data=(x_test, y_test),
                shuffle=True)
  model.save('cifra10_n'+str(e)+'.h5')

Train on 50000 samples, validate on 10000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
 9344/50000 [====>.........................] - ETA: 9s - loss: 1.9427 - acc: 0.2996

Epoch 4/20
Epoch 5/20

Epoch 6/20
Epoch 7/20

Epoch 8/20
Epoch 9/20

Epoch 10/20
Epoch 11/20

Epoch 12/20
Epoch 13/20

Epoch 14/20
Epoch 15/20

Epoch 16/20
Epoch 17/20

Epoch 18/20
Epoch 19/20

Epoch 20/20
Train on 50000 samples, validate on 10000 samples
Epoch 1/20

Epoch 2/20
Epoch 3/20

Epoch 4/20
Epoch 5/20

Epoch 6/20
Epoch 7/20

Epoch 8/20
Epoch 9/20

Epoch 10/20
Epoch 11/20

Epoch 12/20
Epoch 13/20

Epoch 14/20
Epoch 15/20

Epoch 16/20
Epoch 17/20

Epoch 18/20
Epoch 19/20

Epoch 20/20
Train on 50000 samples, validate on 10000 samples
Epoch 1/20

Epoch 2/20
Epoch 3/20

Epoch 4/20
Epoch 5/20

Epoch 6/20
Epoch 7/20

Epoch 8/20
Epoch 9/20

Epoch 10/20
Epoch 11/20

Epoch 12/20
Epoch 13/20

Epoch 14/20
Epoch 15/20

Epoch 16/20
Epoch 17/20

Epoch 18/20
Epoch 19/20

Epoch 20/20
Train on 50000 samples, validate on 10000 samples
Epoch 1/20

Epoch 2/20
Epoch 3/20

Epoch 4/20
Epoch 5/20

Epoch 6/20
Epoch 7/20

Epoch 8/20
Epoch 9/20

Epoch 10/20
Epoch 11/20

Epoch 12/20
Epoch 13/20

Epoch 14/20
Epoch 15/20

Epoch 16/20
Epoch 17/20

Epoch 18/20
Epoch 19/20

Epoch 20/20
Train on 50000 samples, validate on 10000 samples
Epoch 1/20

Epoch 2/20
Epoch 3/20

Epoch 4/20
Epoch 5/20

Epoch 6/20
Epoch 7/20

Epoch 8/20
Epoch 9/20

Epoch 10/20
Epoch 11/20

Epoch 12/20
Epoch 13/20

Epoch 14/20
Epoch 15/20

Epoch 16/20
Epoch 17/20

Epoch 18/20
Epoch 19/20

Epoch 20/20


In [9]:
# Load models and print results
for e in [0.5,0.1,0.05,0.02,0.01]:
    model = keras.models.load_model('cifra10_n'+str(e)+'.h5')
    print('\n  - Gaussian Noise with '+str(e)+' stdev:')
    scores = model.evaluate(x_test, y_test, verbose=0)
    print('\tTest loss:\t', scores[0])
    print('\tTest accuracy:\t', scores[1])


  - Gaussian Noise with 0.5 stdev:
	Test loss:	 1.3346526174545288
	Test accuracy:	 0.523

  - Gaussian Noise with 0.1 stdev:
	Test loss:	 1.001115407371521
	Test accuracy:	 0.6474

  - Gaussian Noise with 0.05 stdev:
	Test loss:	 0.9230113963127137
	Test accuracy:	 0.6798

  - Gaussian Noise with 0.02 stdev:
	Test loss:	 0.9106454853057862
	Test accuracy:	 0.6845

  - Gaussian Noise with 0.01 stdev:
	Test loss:	 0.9092573655128479
	Test accuracy:	 0.6836


### 2) Add layers to the network to improve performance

#### First model: Add more layers and Adam optimizer

In [10]:
model = Sequential()

model.add(Conv2D(32, (3, 3), padding='same',
                 input_shape=x_train.shape[1:]))
model.add(Activation('relu'))
model.add(Conv2D(32, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Conv2D(64, (3, 3), padding='same'))
model.add(Activation('relu'))
model.add(Conv2D(64, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(512))
#add another dense layer
model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes))
model.add(Activation('softmax'))

model.compile(loss='categorical_crossentropy',
              optimizer=keras.optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False),
                metrics=['accuracy'])
model.fit(x_train, y_train,
            batch_size=batch_size,
            epochs=epochs,
            validation_data=(x_test, y_test),
            shuffle=True)

model.save('cifra10_moreLayersAdam.h5')

Train on 50000 samples, validate on 10000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
 6016/50000 [==>...........................] - ETA: 11s - loss: 1.0340 - acc: 0.6268

Epoch 4/20
Epoch 5/20

Epoch 6/20
Epoch 7/20

Epoch 8/20
Epoch 9/20

Epoch 10/20
Epoch 11/20

Epoch 12/20
Epoch 13/20

Epoch 14/20
Epoch 15/20

Epoch 16/20
Epoch 17/20

Epoch 18/20
Epoch 19/20

Epoch 20/20


In [11]:
model= keras.models.load_model('cifra10_moreLayersAdam.h5')
print('\n  - More layers and Atom optimizer:')
scores = model.evaluate(x_test, y_test, verbose=0)
print('\tTest loss:\t', scores[0])
print('\tTest accuracy:\t', scores[1])

Y_pred = model.predict(x_test, verbose=2)
cm = confusion_matrix(np.argmax(y_test,axis=1),np.argmax(Y_pred, axis=1))
print('\tConfusion Matrix:\t')
print(cm)


  - More layers and Atom optimizer:
	Test loss:	 0.6483449568748474
	Test accuracy:	 0.7837
	Confusion Matrix:	
[[823  19  23   9   4   4  10   6  68  34]
 [  6 930   0   2   1   3   2   1  14  41]
 [ 78   5 638  42  54  65  81  11  18   8]
 [ 29  15  45 555  38 174  79  20  26  19]
 [ 22   4  64  43 713  27  71  42   7   7]
 [ 15   5  42 150  25 709  23  21   7   3]
 [ 10   5  19  38  13  17 884   4   4   6]
 [ 23   1  28  29  42  52   6 800   4  15]
 [ 41  25   0   5   3   2   4   2 904  14]
 [ 20  59   6   6   1   1   2   3  21 881]]


#### Second model: regularizer

In [12]:
from keras import regularizers

weight_decay = 1e-4

model = Sequential()

model.add(Conv2D(32, (3, 3), padding='same',
                 input_shape=x_train.shape[1:]))

model.add(Activation('relu'))
model.add(Conv2D(32, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Conv2D(64, (3, 3), padding='same', kernel_regularizer=regularizers.l2(weight_decay)))
model.add(Activation('relu'))
model.add(Conv2D(64, (3, 3), kernel_regularizer=regularizers.l2(weight_decay)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes))
model.add(Activation('softmax'))

opt = keras.optimizers.rmsprop(lr=0.0001, decay=1e-6)
model.compile(loss='categorical_crossentropy',
              optimizer=opt,
                metrics=['accuracy'])
model.fit(x_train, y_train,
            batch_size=batch_size,
            epochs=epochs,
            validation_data=(x_test, y_test),
            shuffle=True)

model.save('cifra10_regL2.h5')

Train on 50000 samples, validate on 10000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
 9088/50000 [====>.........................] - ETA: 10s - loss: 1.5991 - acc: 0.4243

Epoch 4/20
Epoch 5/20

Epoch 6/20
Epoch 7/20

Epoch 8/20
Epoch 9/20

Epoch 10/20
Epoch 11/20

Epoch 12/20
Epoch 13/20

Epoch 14/20
Epoch 15/20

Epoch 16/20
Epoch 17/20

Epoch 18/20
Epoch 19/20

Epoch 20/20


In [13]:
model= keras.models.load_model('cifra10_regL2.h5')
print('\n  - regularizer L2:')
scores = model.evaluate(x_test, y_test, verbose=0)
print('\tTest loss:\t', scores[0])
print('\tTest accuracy:\t', scores[1])

Y_pred = model.predict(x_test, verbose=2)
cm = confusion_matrix(np.argmax(y_test,axis=1),np.argmax(Y_pred, axis=1))
print('\tConfusion Matrix:\t')
print(cm)


  - regularizer L2:
	Test loss:	 0.9115801405906677
	Test accuracy:	 0.6901
	Confusion Matrix:	
[[757  27  30  11   8   4  10   5  80  68]
 [ 16 825   2   3   1   2   9   2  27 113]
 [ 89  11 512  46 102  76  61  49  25  29]
 [ 22  23  62 412  79 200  67  62  23  50]
 [ 28   8  71  46 592  37  62 122  19  15]
 [ 20   9  54 105  41 631  26  70  11  33]
 [ 12  10  36  37  40  29 776  17   7  36]
 [ 22   8  29  23  43  57   5 766   5  42]
 [ 63  45   9   7   5   4   4   3 813  47]
 [ 24  94   7   4   5   3   7  12  27 817]]


### 3) Try another method for dealing with overfitting

In [14]:
from keras.layers.normalization import BatchNormalization

model = Sequential()

model.add(Conv2D(32, (3, 3), padding='same',
                 input_shape=x_train.shape[1:]))

model.add(BatchNormalization())

model.add(Activation('relu'))
model.add(Conv2D(32, (3, 3)))

model.add(BatchNormalization())

model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Conv2D(64, (3, 3), padding='same'))

model.add(BatchNormalization())

model.add(Activation('relu'))
model.add(Conv2D(64, (3, 3)))

model.add(BatchNormalization())

model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes))
model.add(Activation('softmax'))

opt = keras.optimizers.rmsprop(lr=0.0001, decay=1e-6)
model.compile(loss='categorical_crossentropy',
              optimizer=opt,
                metrics=['accuracy'])
model.fit(x_train, y_train,
            batch_size=batch_size,
            epochs=epochs,
            validation_data=(x_test, y_test),
            shuffle=True)

model.save('cifra10_batch.h5')


Train on 50000 samples, validate on 10000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
 8576/50000 [====>.........................] - ETA: 13s - loss: 1.4624 - acc: 0.4674

Epoch 4/20
Epoch 5/20

Epoch 6/20
Epoch 7/20

Epoch 8/20
Epoch 9/20

Epoch 10/20
Epoch 11/20

Epoch 12/20
Epoch 13/20

Epoch 14/20
Epoch 15/20

Epoch 16/20
Epoch 17/20

Epoch 18/20
Epoch 19/20

Epoch 20/20


In [15]:

model = keras.models.load_model('cifra10_batch.h5')
print('\n  - Model with Batch Normalization:')
scores = model.evaluate(x_test, y_test, verbose=0)
print('\tTest loss:\t', scores[0])
print('\tTest accuracy:\t', scores[1])

Y_pred = model.predict(x_test, verbose=2)
cm = confusion_matrix(np.argmax(y_test,axis=1),np.argmax(Y_pred, axis=1))
print('\tConfusion Matrix:\t')
print(cm)


  - Model with Batch Normalization:
	Test loss:	 0.7826421875
	Test accuracy:	 0.7337
	Confusion Matrix:	
[[718  12  41  26  38  15  10  35  47  58]
 [  8 764   3  13   6   5  11  11  16 163]
 [ 46   2 500  74 122 101  69  69   5  12]
 [  6   5  24 565  66 206  53  54   6  15]
 [ 11   1  25  60 735  30  30 101   5   2]
 [  6   1  13 141  47 705  13  68   1   5]
 [  3   1  15  79  45  25 819   9   2   2]
 [  3   0   6  27  40  57   3 859   0   5]
 [ 53  32  10  26  13   5   5  14 789  53]
 [ 13  32   2  11   7   4   5  31  12 883]]


In [16]:
from keras.layers.normalization import BatchNormalization
from keras import regularizers

weight_decay = 1e-4

model = Sequential()

model.add(Conv2D(32, (3, 3), padding='same',
                 input_shape=x_train.shape[1:]))

model.add(BatchNormalization())

model.add(Activation('relu'))
model.add(Conv2D(32, (3, 3)))

model.add(BatchNormalization())

model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Conv2D(64, (3, 3), padding='same', kernel_regularizer=regularizers.l2(weight_decay)))

model.add(BatchNormalization())

model.add(Activation('relu'))
model.add(Conv2D(64, (3, 3), kernel_regularizer=regularizers.l2(weight_decay)))

model.add(BatchNormalization())

model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes))
model.add(Activation('softmax'))

opt = keras.optimizers.rmsprop(lr=0.0001, decay=1e-6)
model.compile(loss='categorical_crossentropy',
              optimizer=opt,
                metrics=['accuracy'])
model.fit(x_train, y_train,
            batch_size=batch_size,
            epochs=epochs,
            validation_data=(x_test, y_test),
            shuffle=True)

model.save('cifra10_batch_regL2.h5')

Train on 50000 samples, validate on 10000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
 8960/50000 [====>.........................] - ETA: 13s - loss: 1.5171 - acc: 0.4443

Epoch 4/20
Epoch 5/20

Epoch 6/20
Epoch 7/20

Epoch 8/20
Epoch 9/20

Epoch 10/20
Epoch 11/20

Epoch 12/20
Epoch 13/20

Epoch 14/20
Epoch 15/20

Epoch 16/20
Epoch 17/20

Epoch 18/20
Epoch 19/20

Epoch 20/20


In [17]:
model.save('cifra10_batch_regL2.h5')
print('\n  - Model with batch normalization and regularizer:')
scores = model.evaluate(x_test, y_test, verbose=0)
print('\tTest loss:\t', scores[0])
print('\tTest accuracy:\t', scores[1])


  - Model with batch normalization and regularizer:
	Test loss:	 0.794379836654663
	Test accuracy:	 0.7245
