## CIFAR10 CNN （without BN vs with BN）

https://github.com/keras-team/keras/blob/master/examples/cifar10_cnn.py

In [1]:
from __future__ import print_function
import keras
from keras.datasets import cifar10
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Convolution2D, ZeroPadding2D, MaxPooling2D
from keras.layers.normalization import BatchNormalization
import os

Using Theano backend.
 https://github.com/Theano/Theano/wiki/Converting-to-the-new-gpu-back-end%28gpuarray%29

Using gpu device 0: GeForce GTX 950 (CNMeM is enabled with initial size: 90.0% of memory, cuDNN 5110)


In [2]:
batch_size = 32
num_classes = 10
epochs = 50
data_augmentation = True

### The data, shuffled and split between train and test sets

In [3]:
(x_train, y_train), (x_test, y_test) = cifar10.load_data()

In [4]:
print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')

x_train shape: (50000, 3, 32, 32)
50000 train samples
10000 test samples


### Convert class vectors to binary class matrices

In [5]:
y_train = keras.utils.np_utils.to_categorical(y_train, num_classes)
y_test = keras.utils.np_utils.to_categorical(y_test, num_classes)

### Create model

In [6]:
model = Sequential()

In [7]:
model.add(ZeroPadding2D((1, 1), input_shape=x_train.shape[1:]))
model.add(Convolution2D(32, 3, 3, activation='relu'))
model.add(MaxPooling2D((2, 2), strides=(2, 2)))
model.add(Dropout(0.25))
model.add(ZeroPadding2D((1, 1)))
model.add(Convolution2D(64, 3, 3, activation='relu'))
model.add(Convolution2D(64, 3, 3, activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(512, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes, activation='softmax'))

In [8]:
model.summary()

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
zeropadding2d_1 (ZeroPadding2D)  (None, 3, 34, 34)     0           zeropadding2d_input_1[0][0]      
____________________________________________________________________________________________________
convolution2d_1 (Convolution2D)  (None, 32, 32, 32)    896         zeropadding2d_1[0][0]            
____________________________________________________________________________________________________
maxpooling2d_1 (MaxPooling2D)    (None, 32, 16, 16)    0           convolution2d_1[0][0]            
____________________________________________________________________________________________________
dropout_1 (Dropout)              (None, 32, 16, 16)    0           maxpooling2d_1[0][0]             
___________________________________________________________________________________________

### initiate RMSprop optimizer

In [9]:
opt = keras.optimizers.rmsprop(lr=0.0001, decay=1e-6)

### Let's train the model using RMSprop

In [10]:
model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])

In [11]:
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')

In [12]:
x_train /= 255
x_test /= 255

In [13]:
if not data_augmentation:
    print('Not using data augmentation.')
    model.fit(x_train, y_train, batch_size=batch_size, nb_epoch=epochs, validation_data=(x_test, y_test), shuffle=True)
else:
    print('Using real-time data augmentation.')
    # This wll do preprocessing and realtime data augmentation:
    datagen = ImageDataGenerator(
        featurewise_center=False, # set input mean to 0 over the dataset
        samplewise_center=False, # set each sample mean to 0
        featurewise_std_normalization=False, # devide inputs by std of the dataset
        samplewise_std_normalization=False, # devide each input by its std
        zca_whitening=False, # apply ZCA whitening
        rotation_range=0, # randomly rorate iamges in the range (degrees, 0 to 180)
        width_shift_range=0.1, # randomly shift images horizontally (fraction of total width)
        height_shift_range=0.1, # randomly shift images vertically (fraction of total height)
        horizontal_flip=True, # randomly flip images
        vertical_flip=False # randomly flip images
    )
    
    #  Compute quantities required for feature-wise normalization
    # (std, mean, and  principal components if ZCA whitening is applied)
    datagen.fit(x_train)
    
    # Fit the model on the batches generated by datagen.flow()
    batches = datagen.flow(x_train, y_train, batch_size=batch_size)
    gen = ImageDataGenerator()
    val_batches = gen.flow(x_test, y_test, batch_size=batch_size, shuffle=False)
    model.fit_generator(batches, batches.n, nb_epoch=epochs,
                        validation_data=val_batches, nb_val_samples=val_batches.n)

Using real-time data augmentation.
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


### Score trained model

In [14]:
scores = model.evaluate(x_test, y_test, verbose=1)



In [15]:
print('Test loss:', scores[0])
print('Test accuracy:', scores[1])

Test loss: 0.800688494301
Test accuracy: 0.7398


### learning rate 0.001

In [16]:
opt = keras.optimizers.rmsprop(lr=0.001, decay=1e-6)

In [17]:
model = Sequential()

In [18]:
model.add(ZeroPadding2D((1, 1), input_shape=x_train.shape[1:]))
model.add(Convolution2D(32, 3, 3, activation='relu'))
model.add(MaxPooling2D((2, 2), strides=(2, 2)))
model.add(Dropout(0.25))
model.add(ZeroPadding2D((1, 1)))
model.add(Convolution2D(64, 3, 3, activation='relu'))
model.add(Convolution2D(64, 3, 3, activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(512, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes, activation='softmax'))

In [19]:
model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])

In [20]:
if not data_augmentation:
    print('Not using data augmentation.')
    model.fit(x_train, y_train, batch_size=batch_size, nb_epoch=epochs, validation_data=(x_test, y_test), shuffle=True)
else:
    print('Using real-time data augmentation.')
    # This wll do preprocessing and realtime data augmentation:
    datagen = ImageDataGenerator(
        featurewise_center=False, # set input mean to 0 over the dataset
        samplewise_center=False, # set each sample mean to 0
        featurewise_std_normalization=False, # devide inputs by std of the dataset
        samplewise_std_normalization=False, # devide each input by its std
        zca_whitening=False, # apply ZCA whitening
        rotation_range=0, # randomly rorate iamges in the range (degrees, 0 to 180)
        width_shift_range=0.1, # randomly shift images horizontally (fraction of total width)
        height_shift_range=0.1, # randomly shift images vertically (fraction of total height)
        horizontal_flip=True, # randomly flip images
        vertical_flip=False # randomly flip images
    )
    
    #  Compute quantities required for feature-wise normalization
    # (std, mean, and  principal components if ZCA whitening is applied)
    datagen.fit(x_train)
    
    # Fit the model on the batches generated by datagen.flow()
    batches = datagen.flow(x_train, y_train, batch_size=batch_size)
    gen = ImageDataGenerator()
    val_batches = gen.flow(x_test, y_test, batch_size=batch_size, shuffle=False)
    model.fit_generator(batches, batches.n, nb_epoch=epochs,
                        validation_data=val_batches, nb_val_samples=val_batches.n)

Using real-time data augmentation.
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


### Batch normalization

In [21]:
opt = keras.optimizers.rmsprop(lr=0.0001, decay=1e-6)

In [22]:
bn_model = Sequential()

In [23]:
bn_model.add(ZeroPadding2D((1, 1), input_shape=x_train.shape[1:]))
bn_model.add(Convolution2D(32, 3, 3, activation='relu'))
bn_model.add(BatchNormalization(axis=1)),
bn_model.add(MaxPooling2D((2, 2), strides=(2, 2)))
bn_model.add(Dropout(0.25))
bn_model.add(ZeroPadding2D((1, 1)))
bn_model.add(Convolution2D(64, 3, 3, activation='relu'))
bn_model.add(BatchNormalization(axis=1)),
bn_model.add(Convolution2D(64, 3, 3, activation='relu'))
bn_model.add(BatchNormalization(axis=1)),
bn_model.add(MaxPooling2D(pool_size=(2, 2)))
bn_model.add(Dropout(0.25))
bn_model.add(Flatten())
bn_model.add(Dense(512, activation='relu'))
bn_model.add(BatchNormalization())
bn_model.add(Dropout(0.5))
bn_model.add(Dense(num_classes, activation='softmax'))

In [24]:
bn_model.summary()

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
zeropadding2d_5 (ZeroPadding2D)  (None, 3, 34, 34)     0           zeropadding2d_input_3[0][0]      
____________________________________________________________________________________________________
convolution2d_7 (Convolution2D)  (None, 32, 32, 32)    896         zeropadding2d_5[0][0]            
____________________________________________________________________________________________________
batchnormalization_1 (BatchNorma (None, 32, 32, 32)    128         convolution2d_7[0][0]            
____________________________________________________________________________________________________
maxpooling2d_5 (MaxPooling2D)    (None, 32, 16, 16)    0           batchnormalization_1[0][0]       
___________________________________________________________________________________________

In [25]:
bn_model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])

In [26]:
if not data_augmentation:
    print('Not using data augmentation.')
    bn_model.fit(x_train, y_train, batch_size=batch_size, nb_epoch=epochs, validation_data=(x_test, y_test), shuffle=True)
else:
    print('Using real-time data augmentation.')
    # This wll do preprocessing and realtime data augmentation:
    datagen = ImageDataGenerator(
        featurewise_center=False, # set input mean to 0 over the dataset
        samplewise_center=False, # set each sample mean to 0
        featurewise_std_normalization=False, # devide inputs by std of the dataset
        samplewise_std_normalization=False, # devide each input by its std
        zca_whitening=False, # apply ZCA whitening
        rotation_range=0, # randomly rorate iamges in the range (degrees, 0 to 180)
        width_shift_range=0.1, # randomly shift images horizontally (fraction of total width)
        height_shift_range=0.1, # randomly shift images vertically (fraction of total height)
        horizontal_flip=True, # randomly flip images
        vertical_flip=False # randomly flip images
    )
    
    #  Compute quantities required for feature-wise normalization
    # (std, mean, and  principal components if ZCA whitening is applied)
    datagen.fit(x_train)
    
    # Fit the model on the batches generated by datagen.flow()
    batches = datagen.flow(x_train, y_train, batch_size=batch_size)
    gen = ImageDataGenerator()
    val_batches = gen.flow(x_test, y_test, batch_size=batch_size, shuffle=False)
    bn_model.fit_generator(batches, batches.n, nb_epoch=epochs,
                        validation_data=val_batches, nb_val_samples=val_batches.n)

Using real-time data augmentation.
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [27]:
scores = bn_model.evaluate(x_test, y_test, verbose=1)



In [28]:
print('Test loss:', scores[0])
print('Test accuracy:', scores[1])

Test loss: 0.712403135777
Test accuracy: 0.7671


### learning rate 0.001

In [29]:
opt = keras.optimizers.rmsprop(lr=0.001, decay=1e-6)

In [30]:
bn_model = Sequential()

In [31]:
bn_model.add(ZeroPadding2D((1, 1), input_shape=x_train.shape[1:]))
bn_model.add(Convolution2D(32, 3, 3, activation='relu'))
bn_model.add(BatchNormalization(axis=1)),
bn_model.add(MaxPooling2D((2, 2), strides=(2, 2)))
bn_model.add(Dropout(0.25))
bn_model.add(ZeroPadding2D((1, 1)))
bn_model.add(Convolution2D(64, 3, 3, activation='relu'))
bn_model.add(BatchNormalization(axis=1)),
bn_model.add(Convolution2D(64, 3, 3, activation='relu'))
bn_model.add(BatchNormalization(axis=1)),
bn_model.add(MaxPooling2D(pool_size=(2, 2)))
bn_model.add(Dropout(0.25))
bn_model.add(Flatten())
bn_model.add(Dense(512, activation='relu'))
bn_model.add(BatchNormalization())
bn_model.add(Dropout(0.5))
bn_model.add(Dense(num_classes, activation='softmax'))

In [32]:
bn_model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])

In [33]:
if not data_augmentation:
    print('Not using data augmentation.')
    bn_model.fit(x_train, y_train, batch_size=batch_size, nb_epoch=epochs, validation_data=(x_test, y_test), shuffle=True)
else:
    print('Using real-time data augmentation.')
    # This wll do preprocessing and realtime data augmentation:
    datagen = ImageDataGenerator(
        featurewise_center=False, # set input mean to 0 over the dataset
        samplewise_center=False, # set each sample mean to 0
        featurewise_std_normalization=False, # devide inputs by std of the dataset
        samplewise_std_normalization=False, # devide each input by its std
        zca_whitening=False, # apply ZCA whitening
        rotation_range=0, # randomly rorate iamges in the range (degrees, 0 to 180)
        width_shift_range=0.1, # randomly shift images horizontally (fraction of total width)
        height_shift_range=0.1, # randomly shift images vertically (fraction of total height)
        horizontal_flip=True, # randomly flip images
        vertical_flip=False # randomly flip images
    )
    
    #  Compute quantities required for feature-wise normalization
    # (std, mean, and  principal components if ZCA whitening is applied)
    datagen.fit(x_train)
    
    # Fit the model on the batches generated by datagen.flow()
    batches = datagen.flow(x_train, y_train, batch_size=batch_size)
    gen = ImageDataGenerator()
    val_batches = gen.flow(x_test, y_test, batch_size=batch_size, shuffle=False)
    bn_model.fit_generator(batches, batches.n, nb_epoch=epochs,
                        validation_data=val_batches, nb_val_samples=val_batches.n)

Using real-time data augmentation.
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


| | loss | acc | val_loss | val_acc |
|---|---|---|---|---|
| lr=0.0001 | 0.8875 | 0.7005 | 0.8007 | 0.7398 | 
| BN, lr=0.0001 | 0.6417 | 0.7824 | 0.7124 | 0.7671 |
| lr=0.001 | 2.1267 | 0.2226 | 2.0676 | 0.2230 |
| BN, lr=0.001 | 0.5938 | 0.8006 | 0.5055 | 0.8264 |