# CNN from scratch - Keras+TensorFlow

This is for CNN models built from scratch, using Keras based on TensorFlow.
First, some preparation work.

In [None]:
from keras.layers import Conv2D, MaxPooling2D, Input, Dense, Flatten, Activation, add
from keras.layers.core import Dropout
from keras.layers.normalization import BatchNormalization
from keras.layers.pooling import GlobalAveragePooling2D
from keras.optimizers import RMSprop
from keras.models import Model, Sequential
from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau
from lib.data_utils import get_MNIST_data

Read the MNIST data. Notice that we assume that it's 'kaggle-DigitRecognizer/data/train.csv', and we use helper function to read into a dictionary.

In [None]:
# by default, there would be 41000 training data, 1000 test data and 1000 validation data(within traning set)
data = get_MNIST_data()

# see if we get the data correctly
print('image size: ', data['X_train'].shape)

## Simple CNN model

Build a simple CNN model using Keras and then train from scratch.

In [None]:
# model architecture
# [batchnorm-Conv-Conv-maxpool]x2 - [dense]x2 - [softmax]
simple_CNN = Sequential()
simple_CNN.add(BatchNormalization(input_shape=(28, 28, 1)))
simple_CNN.add(Conv2D(32, (3, 3), activation='relu', padding='same'))
simple_CNN.add(Conv2D(32, (3, 3), activation='relu', padding='same'))
simple_CNN.add(MaxPooling2D((2, 2))) # (14,14,32)
simple_CNN.add(Dropout(0.25))

simple_CNN.add(BatchNormalization())
simple_CNN.add(Conv2D(64, (3, 3), activation='relu', padding='same'))
simple_CNN.add(Conv2D(64, (3, 3), activation='relu', padding='same'))
simple_CNN.add(MaxPooling2D((2, 2))) # (7,7,64)
simple_CNN.add(Dropout(0.25))

simple_CNN.add(Flatten())
simple_CNN.add(Dense(1024, activation='relu'))
simple_CNN.add(Dropout(0.5))
simple_CNN.add(Dense(10, activation='softmax'))

# set loss and optimizer
rmsprop = RMSprop(lr=0.001, decay=0.99)
simple_CNN.compile(loss='sparse_categorical_crossentropy', optimizer=rmsprop, metrics=['accuracy'])

# train the model
simple_CNN.fit(data['X_train'], data['y_train'].reshape(-1,1), batch_size=64, epochs=10)

# test the model and see accuracy
score = simple_CNN.evaluate(data['X_test'], data['y_test'].reshape(-1, 1), batch_size=64)
print(score)

In [None]:
# save the model: 0.822
simple_CNN.save('simple_CNN.h5')

## Mini ResNet

Build the small ResNet with 22 layers using Keras and train from scratch.

In [None]:
# model architecture
# [Conv-batchnorm-relu]x4 - [residual: [Conv-batchnorm-relu]x2-Conv-batchnorm-add-relu]x6
# 4
inputs = Input(shape=(28, 28, 1))
x = Conv2D(64, (7, 7), padding='same')(inputs)
x = BatchNormalization()(x)
x = Activation('relu')(x)
x = Conv2D(64, (1, 1), padding='same')(x)
x = BatchNormalization()(x)
x = Activation('relu')(x)
x = Conv2D(64, (3, 3), padding='same')(x)
x = BatchNormalization()(x)
x = Activation('relu')(x)
x = Conv2D(256, (1, 1), padding='same')(x)
x = BatchNormalization()(x)
res = MaxPooling2D((2, 2))(x) # (14, 14, 64)

# repeated residual modules
for i in range(6): # 6x3 = 18
    x = Conv2D(64, (1, 1), padding='same')(res)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = Conv2D(64, (3, 3), padding='same')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = Conv2D(256, (1, 1), padding='same')(x)
    x = BatchNormalization()(x)
    x = add([x, res])
    res = Activation('relu')(x)

x = GlobalAveragePooling2D(data_format='channels_last')(res) #(,256)
predictions = Dense(10, activation='softmax')(x)

# connect the model
mini_ResNet = Model(inputs=inputs, outputs=predictions)

# set loss and optimizer
rmsprop = RMSprop(lr=0.1, decay=0.9999)
mini_ResNet.compile(loss='sparse_categorical_crossentropy', optimizer=rmsprop, metrics=['accuracy'])

# train the model
checkpoint = ModelCheckpoint('miniResNet_{epoch:02d}-{accuracy:.2f}.h5',
                             monitor='accuracy',
                             save_best_only=True)
plateau = ReduceLROnPlateau(factor=0.1, patience=3, min_lr=0.0001)
mini_ResNet.fit(data['X_train'], data['y_train'].reshape(-1, 1), 
                batch_size=32, epochs=10,
                callbacks=[checkpoint, plateau])

# test the model and see accuracy
score = mini_ResNet.evaluate(data['X_test'], data['y_test'].reshape(-1, 1), batch_size=32)
print(score)

In [None]:
# save the model: 0.903
mini_ResNet.save('mini_ResNet.h5')

## Simple CNN with residual connections

Inspired by ResNet, we try to add residual connections to the simple CNN model above and see if there exists difference of performance.

In [None]:
# model architecture
# [Conv] - [batchnorm-Conv-Conv-add-maxpool]x2 - [dense]x2 - [softmax]
inputs = Input(shape=(28,28,1))
x = Conv2D(64, (7, 7), activation='relu', padding='same')(inputs)

res = BatchNormalization()(x) # (28, 28, 64)
x = Conv2D(64, (3, 3), activation='relu', padding='same')(res)
x = Conv2D(64, (3, 3), activation='relu', padding='same')(x)
x = add([res, x])
x = MaxPooling2D((2, 2))(x)

res = BatchNormalization()(x) # (14, 14, 64)
x = Conv2D(64, (3, 3), activation='relu', padding='same')(res)
x = Conv2D(64, (3, 3), activation='relu', padding='same')(x)
x = add([res, x])
x = MaxPooling2D((2, 2))(x)

x = GlobalAveragePooling2D(data_format='channels_last')(x)
predictions = Dense(10, activation='softmax')(x)

simple_resCNN = Model(inputs=inputs,outputs=predictions)

# set loss and optimizer
rmsprop = RMSprop(lr=0.001, decay=0.99)
simple_resCNN.compile(loss='sparse_categorical_crossentropy', optimizer=rmsprop, metrics=['accuracy'])

# train the model
simple_resCNN.fit(data['X_train'], data['y_train'].reshape(-1,1), batch_size=64, epochs=10)

# test the model and see accuracy
score = simple_resCNN.evaluate(data['X_test'], data['y_test'].reshape(-1, 1), batch_size=64)
print(score)

## Create submissions

Load the saved trained models and produce predictions for submission on Kaggle.

In [1]:
from lib.data_utils import create_submission
from keras.models import load_model

# for simple CNN model
simple_CNN = load_model('../models/mini_ResNet.h5')
print('Load model successfully.')
create_submission(simple_CNN, '../data/test.csv', '../submission/submission_miniResNet.csv', 1000)

Load model successfully.

Using TensorFlow backend.


KeyboardInterrupt: 