# Neural Networks
## Prepare Framework
- 適当にニューラルネットを用意する

In [750]:
import numpy as np
import math

In [1527]:
class ReLu(object):

    def __init__(self):
        self.layer_type = 'activation'
        
    def forward(self, x):
        self.x = x
        return np.maximum(x, 0, dtype=x.dtype)
    
    def backward(self, gy):
        return gy * (self.x > 0)

class Linear(object):
    
    def __init__(self, inputs, outputs):
        self.layer_type = 'linear'
        self.W = np.random.uniform(-1/math.sqrt(inputs), 1/math.sqrt(inputs), (outputs, inputs)).astype('f')
        self.b = np.zeros((outputs), dtype=np.float32)
        
    def forward(self, x):
        self.x = x
        x = x.reshape(x.shape[0], -1)
        y = x.dot(self.W.T) + self.b
        return y
    
    def backward(self, gy):
        x = self.x.reshape(self.x.shape[0], -1)
        gx = gy.dot(self.W).reshape(self.x.shape)
        self.gW = gy.T.dot(x)
        self.gb = gy.sum(0)
        return gx.reshape(self.x.shape)
    
class Convolution2D(object):
    
    def __init__(self, in_ch, out_ch, k, stride=1, pad=1):
        self.layer_type = 'convolution'
        self.ksize = k
        self.out_ch = out_ch
        self.stride = stride
        self.pad = pad
        self.W = np.random.uniform(-1/math.sqrt(k*k*in_ch), 1/math.sqrt(k*k*in_ch), (out_ch, in_ch, k, k)).astype('f')
        self.b = np.zeros((out_ch), dtype=np.float32)
        
    def forward(self, x):
        self.x = x
        b, ch, h, w = x.shape
        p = self.pad
        k = self.ksize
        s = self.stride
        
        #padding input image
        _x = np.zeros((b, ch, (h + p*2), (w + p*2)), dtype=np.float32)
        _x[:, :, p:-p, p:-p] = x
        
        #im2col
        self.col = np.zeros((b, ch, k, k, ((h + p*2 - k)//s + 1), ((w + p*2 - k)//s + 1)), dtype=np.float32)
        for i in range(0, h + p*2 - k + 1, s):
            for j in range(0, w + p*2 - k + 1, s):
                self.col[:, :, :, :, i/s, j/s] += _x[:, :, i:i+k, j:j+k]
        
        #convolution
        y = np.tensordot(self.col, self.W, ((1, 2, 3), (1, 2, 3))).astype(x.dtype, copy=False)
        y += self.b
        return np.rollaxis(y, 3, 1)
    
    def backward(self, gy):
        self.gW = np.tensordot(gy, self.col, ((0, 2, 3), (0, 4, 5))).astype(self.W.dtype, copy=False)
        self.gb = gy.sum(axis=(0, 2, 3))
        gcol = np.tensordot(self.W, gy, (0, 1)).astype(x.dtype, copy=False)
        gcol = np.rollaxis(gcol, 3)
        
        #col2im
        b, ch, h, w = self.x.shape
        p = self.pad
        k = self.ksize
        s = self.stride
        gx = np.zeros((b, ch, (h + p*2), (w + p*2)), dtype=np.float32)
        for i in range(0, h + p*2 - k + 1, s):
            for j in range(0, w + p*2 - k + 1, s):
                 gx[:, :, i:i+k, j:j+k] += gcol[:, :, :, :, i/s, j/s]
        return gx[:, :, p:-p, p:-p]
    
def softmax(x):
    x -= x.max(axis=1, keepdims=True)
    exp_x = np.exp(x)
    return exp_x/np.sum(exp_x, axis=1).reshape(-1, 1)

def softmax_cross_entropy(x, t):
    log_y = np.log(softmax(x))
    log_p = log_y[range(len(t)), t.ravel()] #Labelに対応する値が1になる→log(y)=0．不正解Labelに対して期待される確率は0であるからそれらは無視できる．
    loss = - log_p.sum() / len(t)
    
    gx = np.exp(log_y)
    gx[range(len(t)), t.ravel()] -= 1
    gx *= loss
    
    return loss, gx

def accuracy(x, t):
    t_or_f = (np.argmax(x, axis=1)==t).astype('f')
    return np.sum(t_or_f)/len(t_or_f)

In [1528]:
def forward(x):
    for l in nnet:
        x = l.forward(x)
    return x

def backward(gy):
    for l in nnet[::-1]:
        gy = l.backward(gy)

In [1534]:
def update():
    lr = 0.001
    for l in nnet:
        if l.layer_type is not 'activation':
            l.W -= l.gW * lr
            l.b -= l.gb * lr

## Training Test
- mnistで試してみる

In [1517]:
%matplotlib inline
import pylab as plt

In [1518]:
import chainer
train, test = chainer.datasets.get_mnist()

In [1519]:
X = np.zeros((60000, 1, 28, 28), dtype=np.float32)
Y = np.zeros(60000, dtype=np.int32)
for i in range(60000):
    X[i] += train[i][0].reshape(1, 28, 28)
    Y[i] = train[i][1]
X -= 0.5

In [1529]:
c0 = Convolution2D(1, 16, 4, stride=2, pad=1)
c1 = Convolution2D(16, 32, 3, stride=1, pad=1)
c2 = Convolution2D(32, 64, 4, stride=2, pad=1)
c3 = Convolution2D(64, 128, 3, stride=2, pad=1)
l4 = Linear(128*4*4, 256)
l5 = Linear(256, 10)
nnet=[c0,
      ReLu(),
      c1,
      ReLu(),
      c2,
      ReLu(),
      c3,
      ReLu(),
      l4,
      ReLu(),
      l5]

In [None]:
epoch = 100
N = len(X)
batchsize = 32

for e in range(epoch):
    sum_loss = 0.
    sum_acc = 0.
    perm = np.random.permutation(N)
    for i in range(0, N, batchsize):
        y = forward(X[perm[i:i+batchsize]])
        loss, gy = softmax_cross_entropy(y, Y[perm[i:i+batchsize]])
        acc = accuracy(y, Y[perm[i:i+batchsize]])
        backward(gy)
        update()
        sum_loss += loss
        sum_acc += acc
        if i%(32*100)==0:
            print sum_loss/100., sum_acc/100.
            sum_loss = 0.
            sum_acc = 0.

0.0229326915741 0.0021875
2.2967024231 0.1184375
2.17276879191 0.2146875
0.916777430773 0.718125
0.529901775122 0.82625
0.42349107936 0.87125
0.387935250178 0.87625
0.355191886574 0.8965625
0.344978407845 0.9015625
0.311198208258 0.9025
0.301291378736 0.9053125
0.253335500024 0.924375
0.287057323605 0.9071875
0.263761638887 0.92
0.250971356928 0.9196875
0.236316560432 0.9246875
0.242422135696 0.928125
0.238997259364 0.9290625
0.220191886947 0.9346875
0.00195239886642 0.0090625
0.231725176089 0.9275
0.221418728903 0.9315625
0.181325034909 0.9490625
0.19844683852 0.9353125
0.202445771247 0.9359375
0.181318219453 0.94375
0.194040937833 0.9425
0.172942409553 0.945625
0.18240897391 0.9425
0.191960714879 0.9434375
0.159887112807 0.9534375
0.165365229212 0.94875
0.167279122509 0.9490625
0.168200955205 0.9553125
0.177439857051 0.95
0.162232652716 0.951875
0.173152746186 0.945
0.166966315545 0.9459375
0.00307523369789 0.00875
0.142988890968 0.955625
0.146332728378 0.956875
0.162410121299 0.9478