In [1]:
import chainer

In [2]:
chainer.print_runtime_info()

Chainer: 4.1.0
NumPy: 1.14.2
CuPy: Not Available


In [3]:
import numpy as np
import chainer
from chainer.backends import cuda
from chainer import Function, gradient_check, report, training, utils, Variable
from chainer import datasets, iterators, optimizers, serializers
from chainer import Link, Chain, ChainList
import chainer.functions as F
import chainer.links as L
from chainer.training import extensions

from chainer.datasets import split_dataset_random

In [4]:
import matplotlib.pyplot as plt
%matplotlib inline

## Load MNIST Dataset

Load the dataset and check the size and data type of it.

In [5]:
from chainer.datasets import mnist

train_valid, test = mnist.get_mnist()

In [6]:
print("train_valid: %s" % type(train_valid))
print("test: %s" % type(test))

print("# of train_valid: %s" % len(train_valid))
print("# of test: %s" % len(test))

train_valid: <class 'chainer.datasets.tuple_dataset.TupleDataset'>
test: <class 'chainer.datasets.tuple_dataset.TupleDataset'>
# of train_valid: 60000
# of test: 10000


In [29]:
x0, y0 = train_valid[0]
print("size of x0: %s\n" % len(x0))

print(x0)
print(y0)

size of x0: 784

[0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.   

### Split dataset

Split dataset so that the ratio of train-set : validation-set : test-set becomes around 60% : 20% : 20%.

Ref. https://www.youtube.com/watch?v=M3qpIzy4MQk

In [8]:
train, valid = split_dataset_random(train_valid, 50000, seed = 0)

In [9]:
print("# of training dataset: ",  len(train))
print("# of validation dataset: ", len(valid))
print("# of test dataset: ", len(test))

# of training dataset:  50000
# of validation dataset:  10000
# of test dataset:  10000


## Prepare iterators


In [10]:
batch_size = 128

train_iter = iterators.SerialIterator(train, batch_size)
valid_iter = iterators.SerialIterator(valid, batch_size, repeat=False, shuffle=False)
test_iter  = iterators.SerialIterator(test, batch_size, repeat=False, shuffle=False)

## Build Logistic Regression Model

Getting familiar with Chainer, let's build a simple logistic regression model.

In [11]:
class LogisticRegression(Chain):
    
    def __init__(self, n_out = 10):
        super(LogisticRegression, self).__init__()

        with self.init_scope():
            self.fc = L.Linear(None, n_out)
            
    def __call__(self, x):
        return self.fc(x)

## Create trainer

In [24]:
def create_trainer(model,  train_iter, valid_iter, max_epoch=10, gpu_id = -1):
    if gpu_id >= 0:
        model.to_gpu(gpu_id)

    optimizer = optimizers.SGD()
    optimizer.setup(model)
    
    updater = training.updater.StandardUpdater(train_iter, optimizer, device=gpu_id)

    trainer = training.Trainer(updater, (max_epoch, 'epoch'), out='.out/mnist_result')
    
    trainer.extend(extensions.LogReport())
    trainer.extend(extensions.snapshot(filename='snapshot_epoch-{.updater.epoch}'))
    trainer.extend(extensions.snapshot_object(model.predictor, filename='model_epoch-{.updater.epoch}'))
    trainer.extend(extensions.Evaluator(valid_iter, model, device=gpu_id))
    trainer.extend(extensions.PrintReport(['epoch', 'main/loss', 'main/accuracy', 'validation/main/loss', 'validation/main/accuracy', 'elapsed_time']))
    trainer.extend(extensions.PlotReport(['main/loss', 'validation/main/loss'], x_key='epoch', file_name='loss.png'))
    trainer.extend(extensions.PlotReport(['main/accuracy', 'validation/main/accuracy'], x_key='epoch', file_name='accuracy.png'))
    trainer.extend(extensions.dump_graph('main/loss'))
    
    return trainer

In [20]:
model = LogisticRegression()
model = L.Classifier(model)
gpu_id = -1

trainer = create_trainer(model, train_iter, valid_iter, max_epoch=50)

In [15]:
trainer.run()

epoch       main/loss   main/accuracy  validation/main/loss  validation/main/accuracy  elapsed_time
[J1           1.3489      0.704803       0.894117              0.825752                  1.63583       
[J2           0.764448    0.840813       0.668779              0.852848                  3.72292       
[J3           0.620556    0.857492       0.577383              0.865012                  5.74692       
[J4           0.55185     0.867148       0.526616              0.870945                  7.7762        
[J5           0.509772    0.873561       0.493186              0.875494                  9.82244       
[J6           0.481613    0.878105       0.469602              0.878758                  11.8938       
[J7           0.460492    0.882013       0.451425              0.881527                  14.1173       
[J8           0.443764    0.884956       0.437176              0.883999                  16.5765       
[J9           0.430723    0.887228       0.425203          

## Evaluate Logistic Regression with test-set

In [18]:
test_evaluator = extensions.Evaluator(test_iter, model, device=gpu_id)
results = test_evaluator()
print('Test accuracy:', results['main/accuracy'])

Test accuracy: 0.9152492088607594


## Bulid Multi Layer Perceptron

Let's buld Multi Layer Perceptron model to achive more than 91% accuracy.

In [16]:
class MLP(Chain):

    def __init__(self, n_mid_units=100, n_out=10):
        super(MLP, self).__init__()
        with self.init_scope():
            self.l1 = L.Linear(None, n_mid_units)
            self.l2 = L.Linear(None, n_mid_units)
            self.l3 = L.Linear(None, n_out)

    def __call__(self, x):
        h1 = F.relu(self.l1(x))
        h2 = F.relu(self.l2(h1))
        return self.l3(h2)

In [26]:
model = MLP()
model = L.Classifier(model)

train_iter.reset()
valid_iter.reset()
test_iter.reset()

trainer = create_trainer(model, train_iter, valid_iter, max_epoch=50)
trainer.run()

epoch       main/loss   main/accuracy  validation/main/loss  validation/main/accuracy  elapsed_time
[J1           1.76843     0.556666       1.03211               0.794798                  3.21304       
[J2           0.712873    0.836057       0.530062              0.86165                   6.80991       
[J3           0.468432    0.877444       0.419703              0.881527                  10.5739       
[J4           0.393125    0.892223       0.369035              0.896855                  14.3409       
[J5           0.356473    0.900116       0.343895              0.900712                  17.9549       
[J6           0.332784    0.905509       0.32355               0.907832                  22.0274       
[J7           0.31558     0.909187       0.309192              0.911491                  25.7009       
[J8           0.302158    0.914203       0.299162              0.914854                  29.53         
[J9           0.290273    0.91696        0.286628          

## Evaluate MLP with test-set

In [25]:
gpu_id = -1

test_evaluator = extensions.Evaluator(test_iter, model, device=gpu_id)
results = test_evaluator()
print('Test accuracy:', results['main/accuracy'])

Test accuracy: 0.9636075949367089


## References
- [Chainer Guides](https://docs.chainer.org/en/stable/guides/index.html)
- [Chainer v4 ビギナー向けチュートリアル](https://qiita.com/mitmul/items/1e35fba085eb07a92560#%E5%AD%A6%E7%BF%92%E3%83%AB%E3%83%BC%E3%83%97%E3%82%92%E6%9B%B8%E3%81%84%E3%81%A6%E3%81%BF%E3%82%88%E3%81%86)
