# High-level Chainer Example

In [1]:
import os
os.environ['CHAINER_TYPE_CHECK'] = '0'
import sys
import numpy as np
import math
import chainer
import chainer.functions as F
import chainer.links as L
from chainer import optimizers
from chainer import cuda
import json
from common.params import *
from common.utils import *

In [2]:
cuda.set_max_workspace_size(512 * 1024 * 1024)
chainer.global_config.autotune = True

In [3]:
print("OS: ", sys.platform)
print("Python: ", sys.version)
print("Chainer: ", chainer.__version__)
print("CuPy: ", chainer.cuda.cupy.__version__)
print("Numpy: ", np.__version__)
print("GPU: ", get_gpu_name())

OS:  linux
Python:  3.5.2 |Anaconda custom (64-bit)| (default, Jul  2 2016, 17:53:06) 
[GCC 4.4.7 20120313 (Red Hat 4.4.7-1)]
Chainer:  3.1.0
CuPy:  2.1.0
Numpy:  1.13.3
GPU:  ['Tesla K80']


In [4]:
class SymbolModule(chainer.Chain):
    def __init__(self):
        super(SymbolModule, self).__init__()
        with self.init_scope():
            self.conv1 = L.Convolution2D(3, 50, ksize=3, pad=1)
            self.conv2 = L.Convolution2D(50, 50, ksize=3, pad=1)
            self.conv3 = L.Convolution2D(50, 100, ksize=3, pad=1)
            self.conv4 = L.Convolution2D(100, 100, ksize=3, pad=1)
            # feature map size is 8*8 by pooling
            self.fc1 = L.Linear(100*8*8, 512)
            self.fc2 = L.Linear(512, N_CLASSES)
    
    def __call__(self, x):
        h = F.relu(self.conv2(F.relu(self.conv1(x))))
        h = F.max_pooling_2d(h, ksize=2, stride=2)
        h = F.dropout(h, 0.25)
        
        h = F.relu(self.conv4(F.relu(self.conv3(h))))
        h = F.max_pooling_2d(h, ksize=2, stride=2)
        h = F.dropout(h, 0.25)       
        
        h = F.dropout(F.relu(self.fc1(h)), 0.5)
        return self.fc2(h)

In [5]:
def init_model(m):
    optimizer = optimizers.MomentumSGD(lr=LR, momentum=MOMENTUM)
    optimizer.setup(m)
    return optimizer

In [6]:
%%time
# Data into format for library
#x_train, x_test, y_train, y_test = mnist_for_library(channel_first=True)
x_train, x_test, y_train, y_test = cifar_for_library(channel_first=True)
print(x_train.shape, x_test.shape, y_train.shape, y_test.shape)
print(x_train.dtype, x_test.dtype, y_train.dtype, y_test.dtype)

Preparing train set...
Preparing test set...
(50000, 3, 32, 32) (10000, 3, 32, 32) (50000,) (10000,)
float32 float32 int32 int32
CPU times: user 885 ms, sys: 560 ms, total: 1.44 s
Wall time: 1.44 s


In [7]:
%%time
# Create symbol
sym = SymbolModule()
if GPU:
    chainer.cuda.get_device(0).use()  # Make a specified GPU current
    sym.to_gpu()  # Copy the model to the GPU

CPU times: user 257 ms, sys: 232 ms, total: 488 ms
Wall time: 513 ms


In [8]:
%%time
optimizer = init_model(sym)

CPU times: user 100 µs, sys: 0 ns, total: 100 µs
Wall time: 103 µs


In [9]:
results=dict()
with Timer() as t:
    for j in range(EPOCHS):
        for data, target in yield_mb(x_train, y_train, BATCHSIZE, shuffle=True):
            # Get samples
            data = cuda.to_gpu(data)
            target = cuda.to_gpu(target)
            output = sym(data)
            loss = F.softmax_cross_entropy(output, target)
            sym.cleargrads()
            loss.backward()
            optimizer.update()
        # Log
        print(j)
print('Training took %.03f sec.' % t.interval)
results['training duration']=t.interval

0
1
2
3
4
5
6
7
8
9
CPU times: user 2min 40s, sys: 1.68 s, total: 2min 42s
Wall time: 2min 42s


In [10]:
%%time
n_samples = (y_test.shape[0]//BATCHSIZE)*BATCHSIZE
y_guess = np.zeros(n_samples, dtype=np.int)
y_truth = y_test[:n_samples]
c = 0

with chainer.using_config('train', False), chainer.using_config('enable_backprop', False):
    for data, target in yield_mb(x_test, y_test, BATCHSIZE):
        # Forwards
        pred = cuda.to_cpu(sym(cuda.to_gpu(data)).data.argmax(-1))
        # Collect results
        y_guess[c*BATCHSIZE:(c+1)*BATCHSIZE] = pred
        c += 1

CPU times: user 1.74 s, sys: 32.1 ms, total: 1.77 s
Wall time: 1.77 s


In [11]:
acc=sum(y_guess == y_truth)/float(len(y_guess))
print("Accuracy: ", acc)
results['accuracy']=t.interval

Accuracy:  0.794070512821


In [None]:
with open('chainer_cnn_{}.json'.format(get_gpu_name()[0].split(' ')[1].split('-')[0]), 'w') as f:
    json.dump(results, f)