In [18]:
# ResNet class

import chainer
import chainer.functions as F
import chainer.links as L
from chainer import optimizers
from chainer import Variable
from chainer import cuda

import math
import numpy as np

class ResBlock(chainer.Chain):
    def __init__(self, n_in, n_out, stride=1, ksize=1):
        w = math.sqrt(2)
        super(ResBlock, self).__init__(
            conv1 = L.Convolution2D(n_in, n_out, 3, stride, 1, w),
            bn1 = L.BatchNormalization(n_out),
            conv2 = L.Convolution2D(n_out, n_out, 3, 1, 1, w),
            bn2 = L.BatchNormalization(n_out),
        )
        
    def __call__(self, x, train):
        h1 = F.relu(self.bn1(self.conv1(x), test=not train))
        h2 = self.bn2(self.conv2(h1), test=not train)
        if x.data.shape != h2.data.shape:
            xp = cuda.get_array_module(x.data)
            n, c, hh, ww = x.data.shape
            pada_c = h2.data.shape[1] - c
            p = xp.zeros((n, pad_c, hh, ww), dtype=xp.float32)
            p = Variable(p, volatile=not train)
            x = F.concat((p, x))
            if x.data.shape[2:] != h2.data.shape[2:]:
                x = F.average_pooling_2d(x, 1, 2)
        return F.relu(h2 + x)

class ResNet(chainer.Chain):
    def __init__(self, block_class, n=18):
        super(ResNet, self).__init__()
        with self.init_scope():
            w = math.sqrt(2)
            links = [('conv1', L.Convolution2D(3, 16, 3, 1, 0, w))]
            links += [('bn1', L.BatchNormalization(16))]
            for i in range(n):
                links += [('res{}'.format(len(links)), block_class(16, 16))]
            for i in range(n):
                links += [('res{}'.format((len(links)), 
                                          block_class(32 if i > 0 else 16, 32,
                                                     1 if i > 0 else 2)))]
            for i in range(n):
                links += [('res{}'.format((len(links)), 
                                          block_class(64 if i > 0 else 32, 64,
                                                     1 if i > 0 else 2)))]
            links += [('_apool{}'.format(len(links)),
                      F.AveragePooling2D(6, 1, 0, False, True))]
            links += [('fc{}'.format(len(links)),
                      L.Linear(64, 10))]
            for link in links:
                if not link[0].startswith('_'):
                    self.add_link(*link)
            self.forward = links
            self.train = True
            
    def __call__(self, x):
        for name, f in self.forward:
            if 'res' in name:
                h = f(x, self.train)
            else:
                h = f(x)
        return h

In [14]:
# MNIST data set

train, test = chainer.datasets.get_mnist()
_xs, ts = train._datasets
_txs, tts = test._datasets

size = 10000
_xs = _xs[:size]
ts = ts[:size]
_txs = _txs[:size]
tts = tts[:size]

In [15]:
# padding (60000, 784) -> (60000, 1, 28, 28) -> (60000, 1, 32, 32)

_v0 = np.row_stack((np.zeros(28), np.zeros(28)))
v0 = np.array(_v0)
_h0 = np.column_stack((np.zeros(32), np.zeros(32)))
h0 = np.array(_h0)

def padding(x):
    tmp1 = np.vstack((x, v0))
    tmp2 = np.vstack((v0, tmp1))
    _tmp1 = np.hstack((tmp2, h0))
    _tmp2 = np.hstack((h0, _tmp1))
    return _tmp2

xs_list = []
for i in range(len(_xs)):
    x = np.reshape(_xs[i], (28, 28))
    pad_x = padding(x)
    xs_list.append(pad_x[np.newaxis, :, :])
txs_list = []
for i in range(len(_txs)):
    tx = np.reshape(_txs[i], (28, 28))
    pad_tx = padding(tx)
    txs_list.append(pad_tx[np.newaxis, :, :])
    
xs = np.array(xs_list, dtype=np.float32)
txs = np.array(txs_list, dtype=np.float32)

In [16]:
# method

def check_accuracy(model, xs, ts, batchsize):
    loss = 0
    num_cors = 0
    for i in range(0, len(xs), batchsize):
        x = xs[i:i + batchsize]
        t = ts[i:i + batchsize]
        
        #var_xs = Variable(cuda.to_gpu(x))
        var_xs = Variable(x)
        t = Variable(np.array(t, "i"))
        ys = model(var_xs)
    
        loss += F.softmax_cross_entropy(ys, t)
        ys = np.argmax(ys.data, axis=1)
        #_t = cuda.to_gpu(np.array(cuda.to_cpu(t.data), dtype=np.float32))
        _t = np.array(t.data, dtype=np.float32)
        cors = (ys == _t)
        num_cors += sum(cors)
    accuracy = num_cors / ts.shape[0]
    return accuracy, loss

In [19]:
# learn

model = ResNet(ResBlock)
optimizer = optimizers.MomentumSGD()
optimizer.setup(model)

batchsize = 100
datasize = len(xs)

# use GPU
chainer.cuda.get_device_from_id(0).use()
model.to_gpu()

xp = cuda.cupy

for epoch in range(20):
    for i in range(0, datasize, batchsize):
        x = xs[i:i + batchsize]
        t = ts[i:i + batchsize]
        
        var_x = Variable(cuda.to_gpu(x))
        #var_x = Variable(x)
        t = Variable(np.array(t, "i"))
        y = model(var_x)
        
        model.cleargrads()
        loss = F.softmax_cross_entropy(y, t)
        loss.backward()
        optimizer.update()
    accuracy_train, loss_train = check_accuracy(model, xs, ts, batchsize)
    accuracy_test, _           = check_accuracy(model, txs, tts, batchsize)
    
    optimizer.new_epoch()
    
    print("Epoch {0} loss(train) = {1}, accuracy(train) = {2}, accuracy(test) = {3}".format(epoch + 1, loss_train, accuracy_train, accuracy_test))

TypeError: __init__() takes from 2 to 5 positional arguments but 6 were given