In [1]:
import numpy as np
import chainer
from chainer.backends import cuda
from chainer import Function, gradient_check, report, training, utils, Variable
from chainer import datasets, iterators, optimizers, serializers
from chainer import Link, Chain, ChainList
import chainer.functions as F
import chainer.links as L
from chainer.training import extensions

### Simple example

In [18]:
x_data = np.array([5], dtype=np.float32)

In [19]:
x_data

array([5.], dtype=float32)

In [20]:
x = Variable(x_data)

In [21]:
x

variable([5.])

In [22]:
y = x*x

In [23]:
y = x**2 - 2 * x + 1

In [26]:
type(y.data)

numpy.ndarray

In [27]:
y.backward()

In [29]:
x.grad

array([8.], dtype=float32)

### Multidimensional output

In [34]:
x = Variable(np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float32))

In [35]:
y = x**2 - 2 * x + 1

In [39]:
# make sure to initialize what the dimension of the final thing shoudl be
y.grad = np.ones((2, 3), dtype=np.float32)

In [40]:
y.backward()

In [41]:
x.grad

array([[ 0.,  2.,  4.],
       [ 6.,  8., 10.]], dtype=float32)

### Links

In [42]:
f = L.Linear(3,2)

In [46]:
f.W.data

array([[-1.3497604 , -0.19706826,  0.84678787],
       [ 0.07082687, -0.14718227,  0.3387233 ]], dtype=float32)

In [47]:
f.b.data

array([0., 0.], dtype=float32)

In [48]:
x = Variable(np.array([[1, 2, 3], [1, 2, 3]], dtype=np.float32))

In [55]:
y = f(x)

In [58]:
y.data

array([[0.7964666, 0.7926322],
       [0.7964666, 0.7926322]], dtype=float32)

In [59]:
f.cleargrads()

In [62]:
y.grad = np.ones((2, 2), dtype=np.float32)

In [63]:
y.backward()

In [64]:
f.W.grad

array([[2., 4., 6.],
       [2., 4., 6.]], dtype=float32)

In [50]:
f(x).data

array([[0.7964666, 0.7926322],
       [0.7964666, 0.7926322]], dtype=float32)

In [52]:
f.grad

AttributeError: 'Linear' object has no attribute 'grad'

### Define my own function

In [66]:
class MulAdd(Function):
    
    def forward_cpu(self, inputs):
        
        x, y, z = inputs
        return (x * y + x,)
    
    def backward_cpu(self, inputs, grad_outputs):
        
        x, y, z = inputs
        
        gw, = grad_outputs
        
        gx = gw*y
        gy = gw*x
        gz = gw
        
        return gx, gy, gz

In [67]:
x = Variable(np.random.uniform(-1, 1, (3, 2)).astype(np.float32))
y = Variable(np.random.uniform(-1, 1, (3, 2)).astype(np.float32))
z = Variable(np.random.uniform(-1, 1, (3, 2)).astype(np.float32))

In [68]:
w = MulAdd()(x, y, z)

In [72]:
def muladd(x, y, z):
    
    return MulAdd()(x, y, z)

In [73]:
w = muladd(x, y, z)

### multilayered perceptron

In [2]:
l1 = L.Linear(4, 3)
l2 = L.Linear(3, 4)

In [3]:
def my_forward(x):
    
    h = l1(x)
    return l2(h)

In [4]:
class MyProc(object):
    
    def __init__(self):
        
        self.l1 = L.Linear(4, 3)
        self.l2 = L.Linear(3,2)
    
    def forward(self, x):
        
        hidden = self.l1(x)
        output = self.l2(h)
        return output

In [5]:
class MyChain(Chain):
    
    def __init__(self):
        super(MyChain, self).__init__() # calls Chain's init
        with self.init_scope():
            
            self.l1 = L.Linear(4, 3)
            self.l2 = L.Linear(3, 2)
    
    def __call__(self, x):
        
        h = self.l1(x)
        return self.l2(h)

### Optimizer

In [6]:
model = MyChain()

In [7]:
optimizer = optimizers.SGD().setup(model)

In [8]:
optimizer.add_hook(chainer.optimizer_hooks.WeightDecay(0.0005))

In [9]:
x = np.random.uniform(-1, 1, (2, 4)).astype(np.float32)

In [10]:
model.cleargrads()

In [11]:
loss = F.sum(model(chainer.Variable(x)))

In [13]:
loss.backward()

In [14]:
optimizer.update()

### Passing a loss function to the update method

In [15]:
arg1 = np.random.uniform(-1, 1, (2,4)).astype(np.float32)

In [16]:
arg2 = np.random.uniform(-1, 1, (2, 4)).astype(np.float32)

In [23]:
arg1-arg2

array([[-1.2828526 , -0.10551794, -0.04234138,  1.5107669 ],
       [ 1.2658169 ,  0.21499115,  0.49786523,  1.5124439 ]],
      dtype=float32)

In [20]:
def lossfun(arg1, arg2):
    
    loss = F.sum(model(arg1 - arg2))
    
    return loss

In [22]:
lossfun(arg1, arg2)

variable(1.5362668)

In [25]:
optimizer.update(lossfun, chainer.Variable(arg1), chainer.Variable(arg2))

### Trainer extension

In [26]:
def lr_drop(trainer):
    
    trainer.updater.get_optimizer('main').lr *= 0.1

In [28]:
trainer

NameError: name 'trainer' is not defined

In [27]:
trainer.extend(lr_drop, trigger=(10, 'epoch'))

NameError: name 'trainer' is not defined

In [2]:
from chainer.datasets import mnist

In [3]:
train, test = mnist.get_mnist()

In [15]:
for i in range(50):
    print(train[i][1])

5
0
4
1
9
2
1
3
1
4
3
5
3
6
1
7
2
8
6
9
4
0
9
1
1
2
4
3
2
7
3
8
6
9
0
5
6
0
7
6
1
8
7
9
3
9
8
5
9
3


In [11]:
x = np.int32(5)

In [12]:
x

5

In [13]:
type(x)

numpy.int32

In [10]:
type(test[0][1])

numpy.int32

In [6]:
train[0][0].shape

(784,)

In [5]:
type(train)

chainer.datasets.tuple_dataset.TupleDataset

In [9]:
train[0][0].shape

(784,)

In [31]:
type(train)

chainer.datasets.tuple_dataset.TupleDataset

In [33]:
batchsize = 128
train_iter = iterators.SerialIterator(train, batchsize)
test_iter = iterators.SerialIterator(test, batchsize, False, False)

### Define the model

In [46]:
class MLP(Chain):
    
    def __init__(self, n_mid_units=100, n_out=10):
        
        super(MLP, self).__init__()
        with self.init_scope():
            self.l1 = L.Linear(None, n_mid_units)
            self.l2 = L.Linear(None, n_mid_units)
            self.l3 = L.Linear(None, n_out)
        
    def __call__(self, x):
        h1 = F.relu(self.l1(x))
        h2 = F.relu(self.l2(h1))
        return self.l3(h2)

In [47]:
gpu_id = -1

In [60]:
model = MLP()

### Setup updater

In [61]:
max_epoch = 10
model = L.Classifier(model)
optimizer = optimizers.MomentumSGD()

In [62]:
optimizer.setup(model)

<chainer.optimizers.momentum_sgd.MomentumSGD at 0x125116908>

In [63]:
updater = training.updaters.StandardUpdater(train_iter, optimizer, device=-1)

### Setup a trainer

In [64]:
trainer = training.Trainer(updater, (max_epoch, 'epoch'), out='/Users/mincheolkim/Github/chainer-tutorial/out')

In [65]:
trainer.extend(extensions.LogReport())
trainer.extend(extensions.PrintReport(['epoch', 'main/loss', 'main/accuracy', 'validation/main/loss', 'validation/main/accuracy', 'elapsed_time']))

In [66]:
trainer.run()

In [67]:
model.reset_state()

AttributeError: 'Classifier' object has no attribute 'reset_state'