## Imports

In [2]:
from __future__ import print_function
import time
import os
import numpy as np
import six
 
import chainer
import chainer.functions as F
import chainer.links as L
from chainer import cuda
from chainer import serializers

## Define Network and loss function

In [3]:
class MLP(chainer.Chain):
    """Neural Network definition, Multi Layer Perceptron"""
    def __init__(self, n_units, n_out):
        super(MLP, self).__init__(
            # the size of the inputs to each layer will be inferred
            l1=L.Linear(None, n_units),  # n_in -> n_units
            l2=L.Linear(None, n_units),  # n_units -> n_units
            l3=L.Linear(None, n_out),  # n_units -> n_out
        )
 
    def __call__(self, x):
        h1 = F.relu(self.l1(x))
        h2 = F.relu(self.l2(h1))
        y = self.l3(h2)
        return y
    
class SoftmaxClassifier(chainer.Chain):
    """Classifier is for calculating loss, from predictor's output.
    predictor is a model that predicts the probability of each label.
    """
    def __init__(self, predictor):
        super(SoftmaxClassifier, self).__init__(
            predictor=predictor
        )
 
    def __call__(self, x, t):
        y = self.predictor(x)
        self.loss = F.softmax_cross_entropy(y, t)
        self.accuracy = F.accuracy(y, t)
        return self.loss


## Model Instantiation 

In [5]:
def main():
    # Configuration setting
    gpu = -1                  # GPU ID to be used for calculation. -1 indicates to use only CPU.
    batchsize = 100           # Minibatch size for training
    epoch = 5                # Number of training epoch
    out = 'result/1_minimum'  # Directory to save the results
    unit = 50                 # Number of hidden layer units, try incresing this value and see if how accuracy changes.
 
    print('GPU: {}'.format(gpu))
    print('# unit: {}'.format(unit))
    print('# Minibatch-size: {}'.format(batchsize))
    print('# epoch: {}'.format(epoch))
    print('out directory: {}'.format(out))
 
    # Set up a neural network to train
    model = MLP(unit, 10)
    # Classifier will calculate classification loss, based on the output of model
    classifier_model = SoftmaxClassifier(model)
    if gpu >= 0:
        chainer.cuda.get_device(gpu).use()  # Make a specified GPU current
        classifier_model.to_gpu()           # Copy the model to the GPU
    xp = np if gpu < 0 else cuda.cupy
    # Setup an optimizer
    optimizer = chainer.optimizers.Adam()
    optimizer.setup(classifier_model)
 
    # Load the MNIST dataset
    train, test = chainer.datasets.get_mnist()
 
    n_epoch = epoch
    N = len(train)       # training data size
    N_test = len(test)  # test data size
 
    # Learning loop
    for epoch in range(1, n_epoch + 1):
        print('epoch', epoch)
 
        # training
        perm = np.random.permutation(N)
        sum_accuracy = 0
        sum_loss = 0
        start = time.time()
        for i in six.moves.range(0, N, batchsize):
            x = chainer.Variable(xp.asarray(train[perm[i:i + batchsize]][0]))
            t = chainer.Variable(xp.asarray(train[perm[i:i + batchsize]][1]))
 
            # Pass the loss function (Classifier defines it) and its arguments
            optimizer.update(classifier_model, x, t)
 
            sum_loss += float(classifier_model.loss.data) * len(t.data)
            sum_accuracy += float(classifier_model.accuracy.data) * len(t.data)
        end = time.time()
        elapsed_time = end - start
        throughput = N / elapsed_time
        print('train mean loss={}, accuracy={}, throughput={} images/sec'.format(
            sum_loss / N, sum_accuracy / N, throughput))
 
        # evaluation
        sum_accuracy = 0
        sum_loss = 0
        for i in six.moves.range(0, N_test, batchsize):
            index = np.asarray(list(range(i, i + batchsize)))
            x = chainer.Variable(xp.asarray(test[index][0]))
            t = chainer.Variable(xp.asarray(test[index][1]))
 
            loss = classifier_model(x, t)
            sum_loss += float(loss.data) * len(t.data)
            sum_accuracy += float(classifier_model.accuracy.data) * len(t.data)
 
        print('test  mean loss={}, accuracy={}'.format(
            sum_loss / N_test, sum_accuracy / N_test))
 
if __name__ == '__main__':
    main()
 

Downloading from http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz...


GPU: -1
# unit: 50
# Minibatch-size: 100
# epoch: 5
out directory: result/1_minimum


Downloading from http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz...
Downloading from http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz...
Downloading from http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz...


epoch 1
train mean loss=0.4122396842390299, accuracy=0.8847666671623786, throughput=23543.566333568622 images/sec
test  mean loss=0.21441456601954997, accuracy=0.9381000036001206
epoch 2
train mean loss=0.18375116545086106, accuracy=0.9464166672031085, throughput=26435.9914369731 images/sec
test  mean loss=0.16052430846495555, accuracy=0.9517000043392181
epoch 3
train mean loss=0.1423880790049831, accuracy=0.9576166693369548, throughput=23999.558266187156 images/sec
test  mean loss=0.13983906885492614, accuracy=0.9586000061035156
epoch 4
train mean loss=0.11849292076192797, accuracy=0.9648166718085607, throughput=25041.475544886755 images/sec
test  mean loss=0.12401987234363332, accuracy=0.963400005698204
epoch 5
train mean loss=0.10147439869431157, accuracy=0.9689500072598457, throughput=24344.289436530136 images/sec
test  mean loss=0.11619650734704919, accuracy=0.9654000067710876


## Changing number of hidden layers from 50 to 100

In [None]:
def main():
    # Configuration setting
    gpu = -1                  # GPU ID to be used for calculation. -1 indicates to use only CPU.
    batchsize = 100           # Minibatch size for training
    epoch = 5                # Number of training epoch
    out = 'result/1_minimum'  # Directory to save the results
    unit = 100                 # Number of hidden layer units, try incresing this value and see if how accuracy changes.
 
    print('GPU: {}'.format(gpu))
    print('# unit: {}'.format(unit))
    print('# Minibatch-size: {}'.format(batchsize))
    print('# epoch: {}'.format(epoch))
    print('out directory: {}'.format(out))
 
    # Set up a neural network to train
    model = MLP(unit, 10)
    # Classifier will calculate classification loss, based on the output of model
    classifier_model = SoftmaxClassifier(model)
    if gpu >= 0:
        chainer.cuda.get_device(gpu).use()  # Make a specified GPU current
        classifier_model.to_gpu()           # Copy the model to the GPU
    xp = np if gpu < 0 else cuda.cupy
    # Setup an optimizer
    optimizer = chainer.optimizers.Adam()
    optimizer.setup(classifier_model)
 
    # Load the MNIST dataset
    train, test = chainer.datasets.get_mnist()
 
    n_epoch = epoch
    N = len(train)       # training data size
    N_test = len(test)  # test data size
 
    # Learning loop
    for epoch in range(1, n_epoch + 1):
        print('epoch', epoch)
 
        # training
        perm = np.random.permutation(N)
        sum_accuracy = 0
        sum_loss = 0
        start = time.time()
        for i in six.moves.range(0, N, batchsize):
            x = chainer.Variable(xp.asarray(train[perm[i:i + batchsize]][0]))
            t = chainer.Variable(xp.asarray(train[perm[i:i + batchsize]][1]))
 
            # Pass the loss function (Classifier defines it) and its arguments
            optimizer.update(classifier_model, x, t)
 
            sum_loss += float(classifier_model.loss.data) * len(t.data)
            sum_accuracy += float(classifier_model.accuracy.data) * len(t.data)
        end = time.time()
        elapsed_time = end - start
        throughput = N / elapsed_time
        print('train mean loss={}, accuracy={}, throughput={} images/sec'.format(
            sum_loss / N, sum_accuracy / N, throughput))
 
        # evaluation
        sum_accuracy = 0
        sum_loss = 0
        for i in six.moves.range(0, N_test, batchsize):
            index = np.asarray(list(range(i, i + batchsize)))
            x = chainer.Variable(xp.asarray(test[index][0]))
            t = chainer.Variable(xp.asarray(test[index][1]))
 
            loss = classifier_model(x, t)
            sum_loss += float(loss.data) * len(t.data)
            sum_accuracy += float(classifier_model.accuracy.data) * len(t.data)
 
        print('test  mean loss={}, accuracy={}'.format(
            sum_loss / N_test, sum_accuracy / N_test))
 
if __name__ == '__main__':
    main()
 

## Changing number of hidden layers from 100 to 500

In [6]:
def main():
    # Configuration setting
    gpu = -1                  # GPU ID to be used for calculation. -1 indicates to use only CPU.
    batchsize = 100           # Minibatch size for training
    epoch = 5                # Number of training epoch
    out = 'result/1_minimum'  # Directory to save the results
    unit = 500                # Number of hidden layer units, try incresing this value and see if how accuracy changes.
 
    print('GPU: {}'.format(gpu))
    print('# unit: {}'.format(unit))
    print('# Minibatch-size: {}'.format(batchsize))
    print('# epoch: {}'.format(epoch))
    print('out directory: {}'.format(out))
 
    # Set up a neural network to train
    model = MLP(unit, 10)
    # Classifier will calculate classification loss, based on the output of model
    classifier_model = SoftmaxClassifier(model)
    if gpu >= 0:
        chainer.cuda.get_device(gpu).use()  # Make a specified GPU current
        classifier_model.to_gpu()           # Copy the model to the GPU
    xp = np if gpu < 0 else cuda.cupy
    # Setup an optimizer
    optimizer = chainer.optimizers.Adam()
    optimizer.setup(classifier_model)
 
    # Load the MNIST dataset
    train, test = chainer.datasets.get_mnist()
 
    n_epoch = epoch
    N = len(train)       # training data size
    N_test = len(test)  # test data size
 
    # Learning loop
    for epoch in range(1, n_epoch + 1):
        print('epoch', epoch)
 
        # training
        perm = np.random.permutation(N)
        sum_accuracy = 0
        sum_loss = 0
        start = time.time()
        for i in six.moves.range(0, N, batchsize):
            x = chainer.Variable(xp.asarray(train[perm[i:i + batchsize]][0]))
            t = chainer.Variable(xp.asarray(train[perm[i:i + batchsize]][1]))
 
            # Pass the loss function (Classifier defines it) and its arguments
            optimizer.update(classifier_model, x, t)
 
            sum_loss += float(classifier_model.loss.data) * len(t.data)
            sum_accuracy += float(classifier_model.accuracy.data) * len(t.data)
        end = time.time()
        elapsed_time = end - start
        throughput = N / elapsed_time
        print('train mean loss={}, accuracy={}, throughput={} images/sec'.format(
            sum_loss / N, sum_accuracy / N, throughput))
 
        # evaluation
        sum_accuracy = 0
        sum_loss = 0
        for i in six.moves.range(0, N_test, batchsize):
            index = np.asarray(list(range(i, i + batchsize)))
            x = chainer.Variable(xp.asarray(test[index][0]))
            t = chainer.Variable(xp.asarray(test[index][1]))
 
            loss = classifier_model(x, t)
            sum_loss += float(loss.data) * len(t.data)
            sum_accuracy += float(classifier_model.accuracy.data) * len(t.data)
 
        print('test  mean loss={}, accuracy={}'.format(
            sum_loss / N_test, sum_accuracy / N_test))
 
if __name__ == '__main__':
    main()
 

GPU: -1
# unit: 500
# Minibatch-size: 100
# epoch: 5
out directory: result/1_minimum
epoch 1
train mean loss=0.22122278173143664, accuracy=0.9345166688164075, throughput=769.0304063994142 images/sec
test  mean loss=0.11015882198233157, accuracy=0.9680000030994416
epoch 2
train mean loss=0.08005895735773569, accuracy=0.9748333435257276, throughput=567.8751289323735 images/sec
test  mean loss=0.08384176248218864, accuracy=0.973300005197525
epoch 3
train mean loss=0.05355364949287226, accuracy=0.9828666772445043, throughput=552.7098209462644 images/sec
test  mean loss=0.07390523866924922, accuracy=0.977000008225441
epoch 4
train mean loss=0.0374969503415438, accuracy=0.9879000095526377, throughput=545.403833217965 images/sec
test  mean loss=0.07387393044031342, accuracy=0.9790000063180924
epoch 5
train mean loss=0.028722861313047663, accuracy=0.9907166745265324, throughput=469.24537218152307 images/sec
test  mean loss=0.07021083826381073, accuracy=0.9794000071287156
