In [163]:
import keras
from keras.layers import *
from keras.models import *
from keras.optimizers import *
from keras.callbacks import *
from keras.preprocessing.image import *

import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [165]:
# display model as svg
def model_as_svg(m):
    from IPython.display import SVG
    from keras.utils.vis_utils import model_to_dot
    return SVG(model_to_dot(m, show_shapes=True)\
               .create(prog='dot', format='svg'))

In [166]:
N_CLASSES = 10

In [167]:
# load data
(X_train, y_train), (X_test, y_test) = keras.datasets.cifar10.load_data()

X_train = X_train.astype(np.float32)
X_test = X_test.astype(np.float32)

# convert ys to one-hots
y_train = keras.utils.to_categorical(y_train, num_classes=N_CLASSES)
y_test = keras.utils.to_categorical(y_test, num_classes=N_CLASSES)

# normalize data

mean_X = np.mean(X_train, axis=0)
X_train -= mean_X
X_test -= mean_X

std_X = np.std(X_test)
X_train /= std_X
X_test /= std_X

In [172]:
def build_plain_cnn_1(input_shape=(32, 32, 3), n_clases=10):

    i = Input(shape=input_shape, name='input')

    x = BatchNormalization()(i)

    x = Conv2D(64, (3, 3), padding='same')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = Conv2D(64, (3, 3), padding='same')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = MaxPooling2D((2, 2))(x)

    x = Conv2D(128, (3, 3), padding='same')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = Conv2D(128, (3, 3), padding='same')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = MaxPooling2D((2, 2))(x)

    x = Conv2D(256, (3, 3), padding='same')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = Conv2D(256, (3, 3), padding='same')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = MaxPooling2D((2, 2))(x)

    x = Conv2D(512, (3, 3), padding='same')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = Conv2D(512, (3, 3), padding='same')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = MaxPooling2D((2, 2))(x)

    x = Flatten()(x)
    x = Dense(1024, activation='relu')(x)
    x = Dense(1024, activation='relu')(x)
    x = Dense(n_clases, activation='softmax')(x)

    return Model(inputs=[i], outputs=[x])

In [173]:
def build_resnet18(input_size=(32, 32, 3), n_classes=10):

    i = Input(shape=input_size, name='input')

    x = Conv2D(64, (7, 7), strides=(2, 2), padding='same', name='conv_0')(i)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    y = MaxPooling2D((2, 2))(x)

    x = Conv2D(64, (3, 3), padding='same', name='conv_64_1a')(y)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = Conv2D(64, (3, 3), padding='same', name='conv_64_1b')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    y = Add()([x, y])
    y = Activation('relu')(y)

    x = Conv2D(64, (3, 3), padding='same', name='conv_64_2a')(y)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = Conv2D(64, (3, 3), padding='same', name='conv_64_2b')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    y = Add()([x, y])
    y = Activation('relu')(y)

    x = Conv2D(
        128, (3, 3), strides=(2, 2), padding='same', name='conv_128_1a')(y)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = Conv2D(128, (3, 3), padding='same', name='conv_128_1b')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    y = Conv2D(128, (3, 3), strides=(2, 2), padding='same')(y)
    y = BatchNormalization()(y)
    y = Add()([x, y])
    y = Activation('relu')(y)

    x = Conv2D(128, (3, 3), padding='same', name='conv_128_2a')(y)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = Conv2D(128, (3, 3), padding='same', name='conv_128_2b')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    y = Add()([x, y])
    y = Activation('relu')(y)

    x = Conv2D(
        256, (3, 3), strides=(2, 2), padding='same', name='conv_256_1a')(y)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = Conv2D(256, (3, 3), padding='same', name='conv_256_1b')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    y = Conv2D(256, (3, 3), strides=(2, 2), padding='same')(y)
    y = BatchNormalization()(y)
    y = Add()([x, y])
    y = Activation('relu')(y)

    x = Conv2D(256, (3, 3), padding='same', name='conv_256_2a')(y)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = Conv2D(256, (3, 3), padding='same', name='conv_256_2b')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    y = Add()([x, y])
    y = Activation('relu')(y)

    x = Conv2D(
        512, (3, 3), strides=(2, 2), padding='same', name='conv_512_1a')(y)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = Conv2D(512, (3, 3), padding='same', name='conv_512_1b')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    y = Conv2D(512, (3, 3), strides=(2, 2), padding='same')(y)
    y = BatchNormalization()(y)
    y = Add()([x, y])
    y = Activation('relu')(y)

    x = Conv2D(512, (3, 3), padding='same', name='conv_512_2a')(y)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = Conv2D(512, (3, 3), padding='same', name='conv_512_2b')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    y = Add()([x, y])
    y = Activation('relu')(y)

    # add average poling as needed to bring filter size to 1x1
    # (on ImageNet 224x224 input filter size here is 7x7, on 32x32 for CIFAR it's 1x1 already)
    y_filter_size = y.shape.as_list()[1:3]
    if max(y_filter_size) > 1:
        x = AveragePooling2D(tuple(y_filter_size))(y)

    x = Flatten()(y)
    x = Dense(n_classes, activation='softmax')(x)

    return Model(inputs=[i], outputs=[x])

In [194]:
model = build_resnet18()
# model = build_plain_cnn_1()

model.compile(
    loss='categorical_crossentropy',
    optimizer=RMSprop(lr=1e-3),
    metrics=['accuracy'])

AUGMENTATION = True
BATCH_SIZE = 256
EPOCHS = 100
SAMPLES_PER_EPOCH = len(X_train) // BATCH_SIZE * BATCH_SIZE
print('SAMPLES_PER_EPOCH:', SAMPLES_PER_EPOCH, 'of', len(X_train))

callbacks = [
    ReduceLROnPlateau(
        factor=0.333,
        cooldown=0,
        patience=2,
        min_lr=1e-9,
        verbose=True,
        monitor='val_acc'),
    EarlyStopping(
        min_delta=0.001, patience=10, monitor='val_acc', verbose=True)
]

if AUGMENTATION:

    g = ImageDataGenerator(
        featurewise_center=False,
        samplewise_center=False,
        featurewise_std_normalization=False,
        samplewise_std_normalization=False,
        zca_whitening=False,
        rotation_range=0.,
        width_shift_range=0.25,
        height_shift_range=0.25,
        horizontal_flip=True,
        vertical_flip=False)

    g.fit(X_train)

    model.fit_generator(
        g.flow(X_train, y_train, batch_size=BATCH_SIZE, shuffle=True),
        steps_per_epoch=SAMPLES_PER_EPOCH // BATCH_SIZE,
        validation_data=(X_test, y_test),
        epochs=EPOCHS,
        verbose=1,
        max_queue_size=100,
        callbacks=callbacks)

else:

    model.fit(
        X_train,
        y_train,
        batch_size=BATCH_SIZE,
        epochs=EPOCHS,
        validation_data=(X_test, y_test),
        shuffle=True,
        callbacks=callbacks)

SAMPLES_PER_EPOCH: 49920 of 50000
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 00019: reducing learning rate to 0.0003330000158166513.
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 00026: reducing learning rate to 0.00011088900119648315.
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 00039: reducing learning rate to 3.692603672971018e-05.
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 00046: reducing learning rate to 1.2296370608964936e-05.
Epoch 48/100
Epoch 49/100
Epoch 00048: reducing learning rate to 4.094691272257478e-06.
Epoch 50/100
Epoch 51/100


Epoch 58/100
Epoch 59/100
Epoch 00058: reducing learning rate to 1.512007213193556e-07.
Epoch 60/100
Epoch 61/100
Epoch 00060: reducing learning rate to 5.03498407766756e-08.
Epoch 00060: early stopping
