In [1]:
import numpy as np
import chainer
from chainer import cuda, Function, gradient_check, report, training, utils, Variable
from chainer import datasets, iterators, optimizers, serializers
from chainer import Link, Chain, ChainList
import chainer.functions as F
import chainer.links as L
from chainer.training import extensions

In [2]:
# Variable
x_data = np.array([5], dtype=np.float32)
x = Variable(x_data)

In [3]:
# Variable object has basic arithmetic operators.
y = x**2 - 2 * x + 1

In [4]:
# y is also a Variable object, whose value can be extracted by accessing the data attribute
y.data

array([ 16.], dtype=float32)

In [5]:
# y not only holds result value. It also holds the computational graph, 
# which enables to compute its differentiation. This is done by calling its backward() method

# This runs backprop, the gradient is computed and stored in the grad attribute of the input variable x

In [6]:
y.backward()
x.grad

array([ 8.], dtype=float32)

In [7]:
# Chainer, by default, releases the gradient arrays of intermediate variables for memory efficiency. 
# In order to preserve gradient information, pass the retain_grad argument to the backward method

In [8]:
z = 2*x
y = x**2 - z + 1
y.backward(retain_grad=True)
z.grad

array([-1.], dtype=float32)

In [9]:
# Otherwise, z.grad will be None 
y.backward()  # The default value of retain_grad is False
z.grad is None

True

In [10]:
#multi-element array input
x = Variable(np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float32))
y = x**2 - 2*x + 1
y.grad = np.ones((2, 3), dtype=np.float32)
y.backward()
x.grad

array([[  0.,   2.,   4.],
       [  6.,   8.,  10.]], dtype=float32)

In [13]:
#Links
f = L.Linear(3, 2)
f.W.data

array([[-0.515405  , -0.34530032,  0.50868386],
       [ 0.90182877,  0.39506868, -0.74484712]], dtype=float32)

In [14]:
f.b.data

array([ 0.,  0.], dtype=float32)

In [15]:
x = Variable(np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float32))
y = f(x)
y.data

array([[ 0.32004601, -0.5425753 ],
       [-0.73601854,  1.11357558]], dtype=float32)

In [17]:
# writing a model as a chain
class MyChain(Chain):
    def __init__(self):
        super(MyChain, self).__init__()
        with self.init_scope():
                self.l1 = L.Linear(4, 3)
                self.l2 = L.Linear(3, 2)

    def __call__(self, x):
        h = self.l1(x)
        return self.l2(h)

In [18]:
# optimizer
model = MyChain()
optimizer = optimizers.SGD()
optimizer.setup(model)