# CIFAR10 with CNN
code from https://github.com/fchollet/keras/blob/master/examples/mnist_cnn.py


This notebook trains a simple convolutional neural network on the CIFAR10 small images dataset. 
Note: Although its seems like the epochs sometimes do not train until the end, this is only a visual issue. 



### Setup

In [10]:
# import libraries
from __future__ import print_function
from __future__ import print_function
import keras
from keras.datasets import cifar10
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Conv2D, MaxPooling2D

#for confusion matrix
import numpy as np
import sklearn
from sklearn.metrics import confusion_matrix


In [11]:
# define constants
batch_size = 128
num_classes = 10
epochs = 20


# The data, shuffled and split between train and test sets:
(x_train, y_train), (x_test, y_test) = cifar10.load_data()
print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')

# Convert class vectors to binary class matrices.
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

# Cast features into correct data type then scale features
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255

x_train shape: (50000, 32, 32, 3)
50000 train samples
10000 test samples


### Base case

In [12]:
model = Sequential()

model.add(Conv2D(32, (3, 3), padding='same',
                 input_shape=x_train.shape[1:]))
model.add(Activation('relu'))
model.add(Conv2D(32, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Conv2D(64, (3, 3), padding='same'))
model.add(Activation('relu'))
model.add(Conv2D(64, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes))
model.add(Activation('softmax'))


In [13]:
# initiate RMSprop optimizer
opt = keras.optimizers.rmsprop(lr=0.0001, decay=1e-6)

# Let's train the model using RMSprop
model.compile(loss='categorical_crossentropy',
              optimizer=opt,
                metrics=['accuracy'])


model.fit(x_train, y_train,
            batch_size=batch_size,
            epochs=epochs,
            validation_data=(x_test, y_test),
            shuffle=True)

# Save the original model
model.save('cifra10_base.h5')



Train on 50000 samples, validate on 10000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
 9344/50000 [====>.........................] - ETA: 9s - loss: 1.5881 - acc: 0.4238 

Epoch 4/20
Epoch 5/20

Epoch 6/20
Epoch 7/20

Epoch 8/20
Epoch 9/20

Epoch 10/20
Epoch 11/20

Epoch 12/20
Epoch 13/20

Epoch 14/20
Epoch 15/20

Epoch 16/20
Epoch 17/20

Epoch 18/20
Epoch 19/20

Epoch 20/20


In [14]:
# Print base model loss and accuracy
print('\n  - Base case:')
model = keras.models.load_model("cifra10_base.h5")
scores = model.evaluate(x_test, y_test, verbose=0)
print('\tTest loss:\t', scores[0])
print('\tTest accuracy:\t', scores[1])
Y_pred = model.predict(x_test, verbose=2)
cm = confusion_matrix(np.argmax(y_test,axis=1),np.argmax(Y_pred, axis=1))
print('\tConfusion Matrix:\t')
print(cm)


  - Base case:
	Test loss:	 0.8805189178466797
	Test accuracy:	 0.6913
	Confusion Matrix:	
[[761  22  42  12   7   6  12  18  65  55]
 [ 17 829   5   4   1   2  12   8  15 107]
 [ 80  10 524  54  93  66  80  61  11  21]
 [ 20  15  72 413  70 200  89  72  18  31]
 [ 30   6  83  34 553  40  87 148  14   5]
 [  9   2  52 121  52 607  39  88  12  18]
 [  9   4  41  43  38  20 812  17   7   9]
 [ 13   6  25  18  39  53  10 817   3  16]
 [ 78  59  15   7   4   6   7   9 775  40]
 [ 34  69   4   7   6   4  11  23  20 822]]


### 1) Augment the data by adding noise.

In [15]:
# Save models with gaussian noise with different Standard Deviation
for e in [0.5,0.1,0.05,0.02,0.01]:  
  model = Sequential()
  model.add(Conv2D(32, (3, 3), padding='same',input_shape=x_train.shape[1:]))
  model.add(Activation('relu'))
  model.add(Conv2D(32, (3, 3)))
  model.add(Activation('relu'))
  model.add(MaxPooling2D(pool_size=(2, 2)))
  model.add(Dropout(0.25))
  
  #Add noise
  model.add(keras.layers.GaussianNoise(e))

  model.add(Conv2D(64, (3, 3), padding='same'))
  model.add(Activation('relu'))
  model.add(Conv2D(64, (3, 3)))
  model.add(Activation('relu'))
  model.add(MaxPooling2D(pool_size=(2, 2)))
  model.add(Dropout(0.25))

  model.add(Flatten())
  model.add(Dense(512))
  model.add(Activation('relu'))
  model.add(Dropout(0.5))
  model.add(Dense(num_classes))
  model.add(Activation('softmax'))
  
  model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])
  model.fit(x_train, y_train,
                batch_size=batch_size,
                epochs=epochs,
                validation_data=(x_test, y_test),
                shuffle=True)
  model.save('cifra10_n'+str(e)+'.h5')

Train on 50000 samples, validate on 10000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
 8576/50000 [====>.........................] - ETA: 10s - loss: 1.9442 - acc: 0.2912

Epoch 4/20
Epoch 5/20

Epoch 6/20
Epoch 7/20

Epoch 8/20
Epoch 9/20

Epoch 10/20
Epoch 11/20

Epoch 12/20
Epoch 13/20

Epoch 14/20
Epoch 15/20

Epoch 16/20
Epoch 17/20

Epoch 18/20
Epoch 19/20

Epoch 20/20
Train on 50000 samples, validate on 10000 samples
Epoch 1/20

Epoch 2/20
Epoch 3/20

Epoch 4/20
Epoch 5/20

Epoch 6/20
Epoch 7/20

Epoch 8/20
Epoch 9/20

Epoch 10/20
Epoch 11/20

Epoch 12/20
Epoch 13/20

Epoch 14/20
Epoch 15/20

Epoch 16/20
Epoch 17/20

Epoch 18/20
Epoch 19/20

Epoch 20/20
Train on 50000 samples, validate on 10000 samples
Epoch 1/20

Epoch 2/20
Epoch 3/20

Epoch 4/20
Epoch 5/20

Epoch 6/20
Epoch 7/20

Epoch 8/20
Epoch 9/20

Epoch 10/20
Epoch 11/20

Epoch 12/20
Epoch 13/20

Epoch 14/20
Epoch 15/20

Epoch 16/20
Epoch 17/20

Epoch 18/20
Epoch 19/20

Epoch 20/20
Train on 50000 samples, validate on 10000 samples
Epoch 1/20

Epoch 2/20
Epoch 3/20

Epoch 4/20
Epoch 5/20

Epoch 6/20
Epoch 7/20

Epoch 8/20
Epoch 9/20

Epoch 10/20
Epoch 11/20

Epoch 12/20
Epoch 13/20

Epoch 14/20
Epoch 15/20

Epoch 16/20
Epoch 17/20

Epoch 18/20
Epoch 19/20

Epoch 20/20
Train on 50000 samples, validate on 10000 samples
Epoch 1/20

Epoch 2/20
Epoch 3/20

Epoch 4/20
Epoch 5/20

Epoch 6/20
Epoch 7/20

Epoch 8/20
Epoch 9/20

Epoch 10/20
Epoch 11/20

Epoch 12/20
Epoch 13/20

Epoch 14/20
Epoch 15/20

Epoch 16/20
Epoch 17/20

Epoch 18/20
Epoch 19/20

Epoch 20/20


In [16]:
# Load models and print results
for e in [0.5,0.1,0.05,0.02,0.01]:
    model = keras.models.load_model('cifra10_n'+str(e)+'.h5')
    print('\n  - Gaussian Noise with '+str(e)+' stdev:')
    scores = model.evaluate(x_test, y_test, verbose=0)
    print('\tTest loss:\t', scores[0])
    print('\tTest accuracy:\t', scores[1])


  - Gaussian Noise with 0.5 stdev:
	Test loss:	 1.4772556245803834
	Test accuracy:	 0.4864

  - Gaussian Noise with 0.1 stdev:
	Test loss:	 0.9550858486175537
	Test accuracy:	 0.6693

  - Gaussian Noise with 0.05 stdev:
	Test loss:	 0.9679321733474732
	Test accuracy:	 0.6651

  - Gaussian Noise with 0.02 stdev:
	Test loss:	 0.9283743391036987
	Test accuracy:	 0.6798

  - Gaussian Noise with 0.01 stdev:
	Test loss:	 0.8998800125122071
	Test accuracy:	 0.6857


### 2) Add layers to the network to improve performance

#### A) Add more layers and Adam optimizer

In [17]:
model = Sequential()

model.add(Conv2D(32, (3, 3), padding='same',
                 input_shape=x_train.shape[1:]))
model.add(Activation('relu'))
model.add(Conv2D(32, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Conv2D(64, (3, 3), padding='same'))
model.add(Activation('relu'))
model.add(Conv2D(64, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(512))
#add another dense layer
model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes))
model.add(Activation('softmax'))

model.compile(loss='categorical_crossentropy',optimizer=keras.optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False),metrics=['accuracy'])
model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, validation_data=(x_test, y_test), shuffle=True)

print('\n  - More layers and Atom optimizer:')
scores = model.evaluate(x_test, y_test, verbose=0)
print('\tTest loss:\t', scores[0])
print('\tTest accuracy:\t', scores[1])

Y_pred = model.predict(x_test, verbose=2)
cm = confusion_matrix(np.argmax(y_test,axis=1),np.argmax(Y_pred, axis=1))
print('\tConfusion Matrix:\t')
print(cm)

Train on 50000 samples, validate on 10000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
 6016/50000 [==>...........................] - ETA: 11s - loss: 0.9922 - acc: 0.6444

Epoch 4/20
Epoch 5/20

Epoch 6/20
Epoch 7/20

Epoch 8/20
Epoch 9/20

Epoch 10/20
Epoch 11/20

Epoch 12/20
Epoch 13/20

Epoch 14/20
Epoch 15/20

Epoch 16/20
Epoch 17/20

Epoch 18/20
Epoch 19/20

Epoch 20/20

  - More layers and Atom optimizer:
	Test loss:	 0.6469792132854462
	Test accuracy:	 0.7883
	Confusion Matrix:	
[[832  15  43   9  14   1  10  12  42  22]
 [  8 911   4   0   1   2   9   2  13  50]
 [ 59   6 696  29  38  49  78  31   9   5]
 [ 19   6  83 513  46 160 110  33  19  11]
 [ 13   4  76  36 695  30 103  34   7   2]
 [ 10   2  55 101  26 723  38  36   2   7]
 [  6   0  27  17  11   8 921   4   5   1]
 [ 11   3  33  21  36  31  10 848   2   5]
 [ 51  17   5   8   5   3   7   5 878  21]
 [ 21  58   5  10   2   1   8  10  19 866]]


#### B) Use batch normalization

In [18]:
from keras.layers.normalization import BatchNormalization

model = Sequential()
model.add(Conv2D(32, (3, 3), padding='same',input_shape=x_train.shape[1:]))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Conv2D(32, (3, 3)))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Conv2D(64, (3, 3), padding='same'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Conv2D(64, (3, 3)))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes))
model.add(Activation('softmax'))

opt = keras.optimizers.rmsprop(lr=0.0001, decay=1e-6)
model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])
model.fit(x_train, y_train,batch_size=batch_size, epochs=epochs, validation_data=(x_test, y_test), shuffle=True)

print('\n  - Model with Batch Normalization:')
scores = model.evaluate(x_test, y_test, verbose=0)
print('\tTest loss:\t', scores[0])
print('\tTest accuracy:\t', scores[1])

Y_pred = model.predict(x_test, verbose=2)
cm = confusion_matrix(np.argmax(y_test,axis=1),np.argmax(Y_pred, axis=1))
print('\tConfusion Matrix:\t')
print(cm)

Train on 50000 samples, validate on 10000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
 8576/50000 [====>.........................] - ETA: 13s - loss: 1.4962 - acc: 0.4556

Epoch 4/20
Epoch 5/20

Epoch 6/20
Epoch 7/20

Epoch 8/20
Epoch 9/20

Epoch 10/20
Epoch 11/20

Epoch 12/20
Epoch 13/20

Epoch 14/20
Epoch 15/20

Epoch 16/20
Epoch 17/20

Epoch 18/20
Epoch 19/20

Epoch 20/20

  - Model with Batch Normalization:
	Test loss:	 0.7986920112609863
	Test accuracy:	 0.7221
	Confusion Matrix:	
[[666   9  47  59  37  12  11  11  90  58]
 [ 10 769   7  22   9   6  23   5  44 105]
 [ 44   1 503 118 101 105  92  23   8   5]
 [  7   0  28 641  45 204  52  11   8   4]
 [  7   0  34 122 679  59  54  37   7   1]
 [  4   0  19 188  32 714  13  27   2   1]
 [  2   1  19  89  31  24 829   4   1   0]
 [  5   1  19  57  74 127  10 701   2   4]
 [ 23  13   5  35   7   8   7   0 881  21]
 [ 13  35   8  34   9   7  13  12  31 838]]


### 3) Try another method batch normalization to deal with overfitting 

#### Using Regularizer

In [19]:
from keras import regularizers

weight_decay = 1e-4
model = Sequential()
model.add(Conv2D(32, (3, 3), padding='same', input_shape=x_train.shape[1:]))

model.add(Activation('relu'))
model.add(Conv2D(32, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Conv2D(64, (3, 3), padding='same', kernel_regularizer=regularizers.l2(weight_decay)))
model.add(Activation('relu'))
model.add(Conv2D(64, (3, 3), kernel_regularizer=regularizers.l2(weight_decay)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes))
model.add(Activation('softmax'))

opt = keras.optimizers.rmsprop(lr=0.0001, decay=1e-6)
model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])
model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, validation_data=(x_test, y_test), shuffle=True)

print('\n  - regularizer L2:')
scores = model.evaluate(x_test, y_test, verbose=0)
print('\tTest loss:\t', scores[0])
print('\tTest accuracy:\t', scores[1])
Y_pred = model.predict(x_test, verbose=2)
cm = confusion_matrix(np.argmax(y_test,axis=1),np.argmax(Y_pred, axis=1))
print('\tConfusion Matrix:\t')
print(cm)

Train on 50000 samples, validate on 10000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
 6528/50000 [==>...........................] - ETA: 10s - loss: 1.5956 - acc: 0.4225

Epoch 4/20
Epoch 5/20

Epoch 6/20
Epoch 7/20

Epoch 8/20
Epoch 9/20

Epoch 10/20
Epoch 11/20

Epoch 12/20
Epoch 13/20

Epoch 14/20
Epoch 15/20

Epoch 16/20
Epoch 17/20

Epoch 18/20
Epoch 19/20

Epoch 20/20

  - regularizer L2:
	Test loss:	 0.9137261812210083
	Test accuracy:	 0.6838
	Confusion Matrix:	
[[701  18  72  15  12  11  16   6 116  33]
 [ 17 807   9   5   5   6  19   3  50  79]
 [ 61   5 552  62  93  65 107  19  20  16]
 [ 12   7  90 455  71 179 131  14  21  20]
 [ 19   4  85  56 592  32 147  41  23   1]
 [ 15   1  84 170  44 577  69  27  10   3]
 [  5   4  37  36  13  13 881   2   6   3]
 [ 23   0  41  47  88  91  19 668   4  19]
 [ 55  29  13  13   5   3  12   2 842  26]
 [ 28  81  10  16   7   5  28  11  51 763]]


#### Base case with no dropouts

In [20]:
model = Sequential()

model.add(Conv2D(32, (3, 3), padding='same',
                 input_shape=x_train.shape[1:]))
model.add(Activation('relu'))
model.add(Conv2D(32, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(64, (3, 3), padding='same'))
model.add(Activation('relu'))
model.add(Conv2D(64, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Flatten())
model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dense(num_classes))
model.add(Activation('softmax'))

opt = keras.optimizers.rmsprop(lr=0.0001, decay=1e-6)
model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])
model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, validation_data=(x_test, y_test),shuffle=True)

print('\n  - Base case without dropouts:')
scores = model.evaluate(x_test, y_test, verbose=0)
print('\tTest loss:\t', scores[0])
print('\tTest accuracy:\t', scores[1])

Train on 50000 samples, validate on 10000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
 9088/50000 [====>.........................] - ETA: 9s - loss: 1.4358 - acc: 0.4883

Epoch 4/20
Epoch 5/20

Epoch 6/20
Epoch 7/20

Epoch 8/20
Epoch 9/20

Epoch 10/20
Epoch 11/20

Epoch 12/20
Epoch 13/20

Epoch 14/20
Epoch 15/20

Epoch 16/20
Epoch 17/20

Epoch 18/20
Epoch 19/20

Epoch 20/20

  - Base case without dropouts:
	Test loss:	 0.9664685956954956
	Test accuracy:	 0.6751


#### Model with Regularizer but no dropout

In [21]:
from keras import regularizers

weight_decay = 1e-4
model = Sequential()
model.add(Conv2D(32, (3, 3), padding='same', input_shape=x_train.shape[1:]))

model.add(Activation('relu'))
model.add(Conv2D(32, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(64, (3, 3), padding='same', kernel_regularizer=regularizers.l2(weight_decay)))
model.add(Activation('relu'))
model.add(Conv2D(64, (3, 3), kernel_regularizer=regularizers.l2(weight_decay)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Flatten())
model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dense(num_classes))
model.add(Activation('softmax'))

opt = keras.optimizers.rmsprop(lr=0.0001, decay=1e-6)
model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])
model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, validation_data=(x_test, y_test), shuffle=True)

print('\n  - regularizer L2 without dropout:')
scores = model.evaluate(x_test, y_test, verbose=0)
print('\tTest loss:\t', scores[0])
print('\tTest accuracy:\t', scores[1])

Train on 50000 samples, validate on 10000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
 9344/50000 [====>.........................] - ETA: 9s - loss: 1.4520 - acc: 0.4853

Epoch 4/20
Epoch 5/20

Epoch 6/20
Epoch 7/20

Epoch 8/20
Epoch 9/20

Epoch 10/20
Epoch 11/20

Epoch 12/20
Epoch 13/20

Epoch 14/20
Epoch 15/20

Epoch 16/20
Epoch 17/20

Epoch 18/20
Epoch 19/20

Epoch 20/20

  - regularizer L2 without dropout:
	Test loss:	 0.9376100969314575
	Test accuracy:	 0.6883
