In [1]:
import chainer
import os
import six
import time
import pylab
import pickle, gzip
import numpy as np
import chainer.functions as F
import chainer.links as L
from chainer import serializers
from chainer import optimizers
from matplotlib import gridspec
from ipywidgets import interact, interactive, fixed
%matplotlib inline
    
N = 60000
N_test = 0

In [2]:
def load_mnist_data(path):
    
    mnist_pickle = open('data/mnist.pkl', 'rb')
    u = pickle._Unpickler( mnist_pickle )
    u.encoding = 'latin1'
    mnist =  u.load()
    
    num_train = 60000
    num_test = 10000

    mnist['data'] = mnist['data'].astype(np.float32)
    mnist['data'] /= 255
    mnist['data'] = mnist['data'].reshape(mnist['data'].shape[0], 1, 28, 28)
    mnist['target'] = mnist['target'].astype(np.int32)

    input_train, input_test = np.split(mnist['data'],   [num_train])
    target_train, target_test = np.split(mnist['target'], [num_train])
    return input_train, target_train, input_test, target_test

In [3]:
class SimpleCNN(chainer.Chain):
    def __init__(self):
        super(SimpleCNN, self).__init__(
            c1=L.Convolution2D(1, 9, 5),
            f3=L.Linear(5184, 10),
        )

        self.outputs = [0] * 4
        self.num_layers = 4

    def __call__(self, x):
        output = x
        output = self.c1(output)
        output = F.relu(output)
        output = self.f3(output)

        return output

    def get_weights(self, layer):
        if layer == 1:
            return self.c1.W.data
        elif layer == 3:
            return self.f3.W.data
        else:
            raise ValueError("Layer does not have weights: {}".format(layer))

In [4]:
def train_model(model, batchsize = 100, num_epochs = 20):    
    optimizer = optimizers.Adam()
    optimizer.setup(model)

    for epoch in six.moves.range(1, num_epochs + 1):
        print('epoch', epoch)

        # training
        perm = np.random.permutation(N)
        sum_accuracy = 0
        sum_loss = 0

        for i in six.moves.range(0, N, batchsize):
            x = chainer.Variable(np.asarray(images_train[perm[i:i + batchsize]]))
            t = chainer.Variable(np.asarray(labels_train[perm[i:i + batchsize]]))

            optimizer.update(model, x, t)

            sum_loss += float(model.loss.data) * len(t.data)
            sum_accuracy += float(model.accuracy.data) * len(t.data)

        print('train mean loss={}, accuracy={}'.format(
                sum_loss / N, sum_accuracy / N))

        # evaluation
        sum_accuracy = 0
        sum_loss = 0
        for i in six.moves.range(0, N_test, batchsize):
            x = chainer.Variable(np.asarray(images_test[i:i + batchsize]))
            t = chainer.Variable(np.asarray(labels_test[i:i + batchsize]))

            loss = model(x, t)
            sum_loss += float(loss.data) * len(t.data)
            sum_accuracy += float(model.accuracy.data) * len(t.data)

        print('test  mean loss={}, accuracy={}'.format(sum_loss / N_test, sum_accuracy / N_test))

In [5]:
images_train, labels_train, images_test, labels_test = load_mnist_data('data/mnist.pkl')
N_test = labels_test.size
simple_model = L.Classifier(SimpleCNN())
train_model(simple_model, num_epochs = 5)

epoch 1
train mean loss=0.250836377479136, accuracy=0.929533335318168
test  mean loss=0.10808072537649423, accuracy=0.9674000072479249
epoch 2
train mean loss=0.09082891652050118, accuracy=0.973600009183089
test  mean loss=0.07338494185125455, accuracy=0.9787000077962875
epoch 3
train mean loss=0.06521509442711249, accuracy=0.9811166777213415
test  mean loss=0.06055964846280403, accuracy=0.9820000070333481
epoch 4
train mean loss=0.05253319886745885, accuracy=0.9838666767875354
test  mean loss=0.058497425749956165, accuracy=0.9819000089168548
epoch 5
train mean loss=0.043599698273465035, accuracy=0.9865333426992099
test  mean loss=0.052942996900237634, accuracy=0.9839000076055526


In [None]:
class PoolingCNN(chainer.Chain):
    def __init__(self):
        super(PoolingCNN, self).__init__(
            c1=L.Convolution2D(1, 9, 5),
            f3=L.Linear(1296, 10),
        )

        self.num_layers = 4
        self.outputs = [0] * self.num_layers

    def __call__(self, x):
        output = x
        output = self.c1(output)
        output = F.relu(F.max_pooling_2d(output, 2, stride=2))
        output = self.f3(output)

        return output

    def get_weights(self, layer):
        if layer == 1:
            return self.c1.W.data
        elif layer == 3:
            return self.f3.W.data
        else:
            raise ValueError("Layer does not have weights: {}".format(layer))

In [None]:
pooling_model = L.Classifier(PoolingCNN())
train_model(pooling_model, num_epochs = 5)

epoch 1
train mean loss=0.3294397140542666, accuracy=0.9100166686251759
test  mean loss=0.13597849015612154, accuracy=0.9618000036478043
epoch 2
train mean loss=0.1150416758004576, accuracy=0.9671833403905232
test  mean loss=0.08257284199818969, accuracy=0.9761000049114227
epoch 3
train mean loss=0.0825290200393647, accuracy=0.9763500101367633
test  mean loss=0.06976450882502831, accuracy=0.9790000063180924
epoch 4
train mean loss=0.06739653394557536, accuracy=0.9804500110944112
test  mean loss=0.059802431911812164, accuracy=0.9810000079870224
epoch 5
