In [1]:
import numpy as np
import chainer
from chainer import cuda, Function, gradient_check, report, training, utils, Variable
from chainer import datasets, iterators, optimizers, serializers
from chainer import Link, Chain, ChainList
import chainer.functions as F
import chainer.links as L
from chainer.training import extensions

## Forward/Backward Computation

In [2]:
x_data = np.array([5], dtype=np.float32)
x = Variable(x_data)

$ y = x^2 − 2x + 1$

In [3]:
y = x**2 - 2 * x + 1

In [4]:
y.data

array([ 16.], dtype=float32)

In [5]:
y.backward()

In [6]:
x.grad

array([ 8.], dtype=float32)

In [7]:
z = 2*x
y = x**2 - z + 1
y.backward(retain_grad=True)
z.grad

array([-1.], dtype=float32)

In [8]:
x = Variable(np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float32))
y = x**2 - 2*x + 1
y.grad = np.ones((2, 3), dtype=np.float32)
y.backward()
x.grad

array([[  0.,   2.,   4.],
       [  6.,   8.,  10.]], dtype=float32)

## Links

In [9]:
f = L.Linear(3, 2)

In [10]:
f.W.data

array([[ 0.31360427,  0.06315196, -0.54749209],
       [-0.23545748,  0.76260567, -0.38407555]], dtype=float32)

In [11]:
f.b.data

array([ 0.,  0.], dtype=float32)

In [12]:
x = Variable(np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float32))
y = f(x)
y.data

array([[-1.20256805,  0.13752723],
       [-1.7147758 ,  0.56674504]], dtype=float32)

In [13]:
f.cleargrads()

In [14]:
y.grad = np.ones((2, 2), dtype=np.float32)
y.backward()
f.W.grad

array([[ 5.,  7.,  9.],
       [ 5.,  7.,  9.]], dtype=float32)

In [15]:
f.b.grad

array([ 2.,  2.], dtype=float32)

## Write a model as a chain

In [16]:
l1 = L.Linear(4, 3)
l2 = L.Linear(3, 2)
def my_forward(x):
    h = l1(x)
    return l2(h)

In [17]:
class MyProc(object):
    def __init__(self):
        self.l1 = L.Linear(4, 3)
        self.l2 = L.Linear(3, 2)
    
    def forward(self, x):
        h = self.l1(x)
        return self.l2(h)

In [20]:
class MyChain(Chain):
    def __init__(self):
        super(MyChain, self).__init__(
            l1 = L.Linear(4, 3),
            l2 = L.Linear(3, 3),
        )
    
    def __call__(self, x):
        h = self.l1(x)
        return self.l2(h)

In [22]:
class MyChain2(ChainList):
    def __init__(self):
        super(MyChain2, self).__init__(
            L.Linear(4, 3),
            L.Linear(3, 2),
        )
    
    def __call__(self, x):
        h = self[0](x)
        return self[1](h)

## Optimizer

In [23]:
model = MyChain()
optimizer = optimizers.SGD()
optimizer.use_cleargrads()
optimizer.setup(model)

In [25]:
optimizer.add_hook(chainer.optimizer.WeightDecay(0.0005))

In [26]:
x = np.random.uniform(-1, 1, (2, 4)).astype('f')
model.cleargrads()
loss = F.sum(model(chainer.Variable(x)))
loss.backward()
optimizer.update()

In [30]:
def lossfun(arg1, arg2):
    loss = F.sum(model(arg1 - arg2))
    return loss

In [32]:
arg1 = np.random.uniform(-1, 1, (2, 4)).astype('f')
arg2 = np.random.uniform(-1, 1, (2, 4)).astype('f')
optimizer.update(lossfun, chainer.Variable(arg1), chainer.Variable(arg2))

## ex) multi-layer perceptron on MNIST

In [33]:
train, test = datasets.get_mnist()

Downloading from http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz...
Downloading from http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz...
Downloading from http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz...
Downloading from http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz...


In [34]:
train_iter = iterators.SerialIterator(train, batch_size=100, shuffle=True)

In [35]:
test_iter = iterators.SerialIterator(test, batch_size=100, repeat=False, shuffle=False)

In [39]:
class MLP(Chain):
    def __init__(self, n_units, n_out):
        super(MLP, self).__init__(
            l1 = L.Linear(None, n_units),
            l2 = L.Linear(None, n_units),
            l3 = L.Linear(None, n_out),
        )
    
    def __call__(self, x):
        h1 = F.relu(self.l1(x))
        h2 = F.relu(self.l2(h1))
        y = self.l3(h2)
        return y

In [40]:
class Classifier(Chain):
    def __init__(self, predictor):
        super(Classifier, self).__init__(predictor=predictor)
    
    def __call__(self, x, t):
        y = self.predictor(x)
        loss = F.softmax_cross_entropy(y, t)
        accuracy = F.accuracy(y, t)
        report({'loss': loss, 'accuracy': accuracy}, self)
        return loss

In [55]:
model = L.Classifier(MLP(100, 10))
optimizer = optimizers.SGD()
optimizer.setup(model)

In [56]:
updater = training.StandardUpdater(train_iter, optimizer)
trainer = training.Trainer(updater, (20, 'epoch'), out='result')

In [51]:
trainer.extend(extensions.Evaluator(test_iter, model))

In [52]:
trainer.extend(extensions.LogReport())

In [53]:
trainer.extend(extensions.PrintReport(['epoch', 'main/accuracy', 'validation/main/accuracy']))

In [54]:
trainer.extend(extensions.ProgressBar())
trainer.run()

[J