In [1]:
# ResNet class

import chainer
import chainer.functions as F
import chainer.links as L
from chainer import optimizers
from chainer import Variable
from chainer import cuda

import numpy as np

class ResBlock(chainer.Chain):
    def __init__(self, n_in, n_out, stride=1, ksize=1):
        super(ResBlock, self).__init__(
            conv1 = L.Convolution2D(n_in, n_out, 3, stride, 1),
            bn1 = L.BatchNormalization(n_out),
            conv2 = L.Convolution2D(n_out, n_out, 3, 1, 1),
            bn2 = L.BatchNormalization(n_out),
        )
        
    def __call__(self, x):
        h1 = F.relu(self.bn1(self.conv1(x)))
        h2 = self.bn2(self.conv2(h1))
        return F.relu(h2 + x)

class ResNet(chainer.Chain):
    def __init__(self, block_class, n=10):
        super(ResNet, self).__init__()
        with self.init_scope():
            self.conv1 = L.Convolution2D(None, n, 5, 1)
            self.bn1 = L.BatchNormalization(n)
            self.bc1 = block_class(n, n)
            self.l1 = L.Linear(None, 10)
            
    def __call__(self, x):
        h1 = self.bn1(self.conv1(x))
        h2 = self.bc1(h1)
        h3 = self.l1(F.average_pooling_2d(h2, 28))
        return h3

In [2]:
# MNIST data set

train, test = chainer.datasets.get_mnist()
_xs, ts = train._datasets
_txs, tts = test._datasets

size = 10000
_xs = _xs[:size]
ts = ts[:size]
_txs = _txs[:size]
tts = tts[:size]

In [3]:
# padding (60000, 784) -> (60000, 1, 28, 28) -> (60000, 1, 32, 32)

_v0 = np.row_stack((np.zeros(28), np.zeros(28)))
v0 = np.array(_v0)
_h0 = np.column_stack((np.zeros(32), np.zeros(32)))
h0 = np.array(_h0)

def padding(x):
    tmp1 = np.vstack((x, v0))
    tmp2 = np.vstack((v0, tmp1))
    _tmp1 = np.hstack((tmp2, h0))
    _tmp2 = np.hstack((h0, _tmp1))
    return _tmp2

xs_list = []
for i in range(len(_xs)):
    x = np.reshape(_xs[i], (28, 28))
    pad_x = padding(x)
    xs_list.append(pad_x[np.newaxis, :, :])
txs_list = []
for i in range(len(_txs)):
    tx = np.reshape(_txs[i], (28, 28))
    pad_tx = padding(tx)
    txs_list.append(pad_tx[np.newaxis, :, :])
    
xs = np.array(xs_list, dtype=np.float32)
txs = np.array(txs_list, dtype=np.float32)

In [4]:
# method

def check_accuracy(model, xs, ts, batchsize):
    loss = 0
    num_cors = 0
    for i in range(0, len(xs), batchsize):
        x = xs[i:i + batchsize]
        t = ts[i:i + batchsize]
        
        #var_xs = Variable(cuda.to_gpu(x))
        var_xs = Variable(x)
        #t = Variable(cuda.to_gpu(np.array(t, "i")))
        t = Variable(np.array(t, "i"))
        ys = model(var_xs)
    
        loss += F.softmax_cross_entropy(ys, t)
        ys = np.argmax(ys.data, axis=1)
        #_t = cuda.to_gpu(np.array(cuda.to_cpu(t.data), dtype=np.float32))
        _t = np.array(t.data, dtype=np.float32)
        cors = (ys == _t)
        num_cors += sum(cors)
    accuracy = num_cors / ts.shape[0]
    return accuracy, loss

In [5]:
# learn

model = ResNet(ResBlock)
optimizer = optimizers.MomentumSGD()
optimizer.setup(model)

batchsize = 100
datasize = len(xs)

# use GPU
#chainer.cuda.get_device_from_id(0).use()
#model.to_gpu()

#xp = cuda.cupy

for epoch in range(20):
    for i in range(0, datasize, batchsize):
        x = xs[i:i + batchsize]
        t = ts[i:i + batchsize]
        
        #var_x = Variable(cuda.to_gpu(x))
        var_x = Variable(x)
        #t = Variable(cuda.to_gpu(np.array(t, "i")))
        t = Variable(np.array(t, "i"))
        y = model(var_x)
        
        model.cleargrads()
        loss = F.softmax_cross_entropy(y, t)
        loss.backward()
        optimizer.update()
    accuracy_train, loss_train = check_accuracy(model, xs, ts, batchsize)
    accuracy_test, _           = check_accuracy(model, txs, tts, batchsize)
    
    optimizer.new_epoch()
    
    print("Epoch {0} loss(train) = {1}, accuracy(train) = {2}, accuracy(test) = {3}".format(epoch + 1, loss_train, accuracy_train, accuracy_test))

Epoch 1 loss(train) = variable(219.75909423828125), accuracy(train) = 0.2522, accuracy(test) = 0.2618
Epoch 2 loss(train) = variable(203.0090789794922), accuracy(train) = 0.3154, accuracy(test) = 0.3138
Epoch 3 loss(train) = variable(183.86343383789062), accuracy(train) = 0.3829, accuracy(test) = 0.386
Epoch 4 loss(train) = variable(162.5158233642578), accuracy(train) = 0.4525, accuracy(test) = 0.4597
Epoch 5 loss(train) = variable(142.8485565185547), accuracy(train) = 0.5478, accuracy(test) = 0.56
Epoch 6 loss(train) = variable(123.6399917602539), accuracy(train) = 0.6452, accuracy(test) = 0.6553
Epoch 7 loss(train) = variable(107.07052612304688), accuracy(train) = 0.6996, accuracy(test) = 0.709
Epoch 8 loss(train) = variable(95.35781860351562), accuracy(train) = 0.7382, accuracy(test) = 0.751
Epoch 9 loss(train) = variable(83.79021453857422), accuracy(train) = 0.777, accuracy(test) = 0.7873
Epoch 10 loss(train) = variable(76.54161071777344), accuracy(train) = 0.7945, accuracy(test) =