# MNIST CNN (precompute conv output)

In [1]:
from theano.sandbox import cuda
cuda.use('gpu0')

 https://github.com/Theano/Theano/wiki/Converting-to-the-new-gpu-back-end%28gpuarray%29

Using gpu device 0: GeForce GTX 950 (CNMeM is enabled with initial size: 90.0% of memory, cuDNN 5110)
 https://github.com/Theano/Theano/wiki/Converting-to-the-new-gpu-back-end%28gpuarray%29



In [2]:
#%matplotlib inline
import utils; reload(utils)
from utils import *
from __future__ import division, print_function

Using Theano backend.


## Setup

In [3]:
batch_size=64

In [4]:
from keras.datasets import mnist
(X_train, y_train), (X_test, y_test) = mnist.load_data()
(X_train.shape, y_train.shape, X_test.shape, y_test.shape)

((60000, 28, 28), (60000,), (10000, 28, 28), (10000,))

In [5]:
X_test = np.expand_dims(X_test,1)
X_train = np.expand_dims(X_train,1)

In [6]:
X_train.shape

(60000, 1, 28, 28)

In [7]:
y_train[:5]

array([5, 0, 4, 1, 9], dtype=uint8)

In [8]:
y_train = onehot(y_train)
y_test = onehot(y_test)

In [9]:
y_train[:5]

array([[ 0.,  0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,  0.],
       [ 1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.]])

In [10]:
mean_px = X_train.mean().astype(np.float32)
std_px = X_train.std().astype(np.float32)

In [11]:
def norm_input(x): return (x-mean_px)/std_px

## Conv + Linear model

In [12]:
def get_conv_model(lr=0.001):
    model = Sequential([
        Lambda(norm_input, input_shape=(1,28,28)),
        ZeroPadding2D((1, 1)),
        Convolution2D(32, 3, 3, activation='relu'),
        MaxPooling2D(),
        ZeroPadding2D((1, 1)),
        Convolution2D(64, 3, 3, activation='relu'),  
        MaxPooling2D(),
        Flatten(),
        Dense(512, activation='relu'),
        Dropout(0.5),
        Dense(10, activation='softmax')
        ])
    model.compile(Adam(lr=lr), loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [13]:
model = get_conv_model(lr=0.001)

  .format(self.name, input_shape))


In [14]:
model.summary()

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
lambda_1 (Lambda)                (None, 1, 28, 28)     0           lambda_input_1[0][0]             
____________________________________________________________________________________________________
zeropadding2d_1 (ZeroPadding2D)  (None, 1, 30, 30)     0           lambda_1[0][0]                   
____________________________________________________________________________________________________
convolution2d_1 (Convolution2D)  (None, 32, 28, 28)    320         zeropadding2d_1[0][0]            
____________________________________________________________________________________________________
maxpooling2d_1 (MaxPooling2D)    (None, 32, 14, 14)    0           convolution2d_1[0][0]            
___________________________________________________________________________________________

In [15]:
gen = image.ImageDataGenerator()
batches = gen.flow(X_train, y_train, batch_size=64) # keras.preprocessing.image.NumpyArrayIterator
test_batches = gen.flow(X_test, y_test, batch_size=64, shuffle=False)

In [16]:
# keras.preprocessing.image.NumpyArrayIterator does not have nb_sample method
batches.n

60000

In [17]:
model.fit_generator(batches, batches.n, nb_epoch=1, 
                    validation_data=test_batches, nb_val_samples=test_batches.n)

Epoch 1/1


<keras.callbacks.History at 0x7f05e82233d0>

In [18]:
model.save_weights('results/mnist-lr0.001.h5')

In [17]:
model.load_weights('results/mnist-lr0.001.h5')

## Precompute convolution output

In [19]:
conv_layers, fc_layers = split_at(model, Convolution2D)

In [20]:
conv_model = Sequential(conv_layers)

In [21]:
conv_model.summary()

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
lambda_1 (Lambda)                (None, 1, 28, 28)     0           lambda_input_1[0][0]             
____________________________________________________________________________________________________
zeropadding2d_1 (ZeroPadding2D)  (None, 1, 30, 30)     0           lambda_1[0][0]                   
                                                                   lambda_1[0][0]                   
____________________________________________________________________________________________________
convolution2d_1 (Convolution2D)  (None, 32, 28, 28)    320         zeropadding2d_1[0][0]            
                                                                   zeropadding2d_1[1][0]            
___________________________________________________________________________________________

In [22]:
conv_feat = conv_model.predict(X_train)
conv_val_feat = conv_model.predict(X_test)

In [23]:
(conv_feat.shape, conv_val_feat.shape)

((60000, 64, 14, 14), (10000, 64, 14, 14))

In [24]:
save_array('results/mnist_conv_feat_lr0.001.dat', conv_feat)
save_array('results/mnist_conv_val_feat_lr0.001.dat', conv_val_feat)

In [21]:
conv_feat = load_array('results/mnist_conv_feat_lr0.001.dat')
conv_val_feat = load_array('results/mnist_conv_val_feat_lr0.001.dat')

In [25]:
conv_layers[-1].output_shape

(None, 64, 14, 14)

## FC only

In [26]:
def get_fc_model(p):
    return [
        MaxPooling2D(input_shape=conv_layers[-1].output_shape[1:]), # first layer must have input_shape
        Flatten(),
        Dense(512, activation='relu'),
        Dropout(p),
        Dense(10, activation='softmax')        
    ]

### Learning rate=0.0001, Dropout=0.5

In [27]:
fc_model = Sequential(get_fc_model(0.5))
fc_model.compile(Adam(lr=0.0001), loss='categorical_crossentropy', metrics=['accuracy'])

In [28]:
fc_model.fit(conv_feat, y_train, batch_size=64, nb_epoch=8, validation_data=(conv_val_feat, y_test))

Train on 60000 samples, validate on 10000 samples
Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8


<keras.callbacks.History at 0x7f05d1e77e10>

## Add Batch normalization

In [29]:
def get_bn_layers(p):
    return [
        MaxPooling2D(input_shape=conv_layers[-1].output_shape[1:]),
        BatchNormalization(axis=1), 
        Flatten(),
        Dense(512, activation='relu'),
        BatchNormalization(),
        Dropout(p),
        Dense(10, activation='softmax')        
    ]

### Learning rate=0.0001, Dropout=0.5

In [30]:
bn_model = Sequential(get_bn_layers(0.5))
bn_model.compile(Adam(lr=0.0001), loss='categorical_crossentropy', metrics=['accuracy'])

In [31]:
bn_model.fit(conv_feat, y_train, batch_size=batch_size, nb_epoch=8, 
             validation_data=(conv_val_feat, y_test))

Train on 60000 samples, validate on 10000 samples
Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8


<keras.callbacks.History at 0x7f05c8db9c50>

| | loss | acc | val_loss | val_acc |
|---|---|---|---|---|
| lr=0.0001, dropout=0.5  | 0.0170 | 0.9949 | 0.0226 | 0.9918 |
| BN, lr=0.0001, dropout=0.5  | 0.0144 | 0.9955 | 0.0237 | 0.9923 |