In [1]:
import numpy as np
import sys

sys.path.append('../src/')

from nodecore import Node
from nodecore import Variable

In [34]:
class Linear(object):
    def __init__(self, in_ch, out_ch):
        self.W = Variable(
            np.random.normal(0, 1 / out_ch, (out_ch, in_ch)).astype('f')
        )
    
    def forward(self, x):
        self.x = x
        self.y = Node(linear_forward, [self.x, self.W])
        return self.y

    def backward(self):
        self.W.grad = Node(linear_backward_W, [self.x, self.y.grad])
        self.x.grad = Node(linear_backward_x, [self.W, self.y.grad])
    
    def __call__(self, x):
        return self.forward(x)

def linear_forward(x, W):
    y = np.matmul(x, W.T)
    return y

def linear_backward_W(x, gy):
    gW = np.matmul(gy.T, x)
    return gW

def linear_backward_x(W, gy):
    gx = np.matmul(gy, W)
    return gx

In [37]:
class Add(object):
    def forward(self, a, b):
        self.a = a
        self.b = b
        self.y = Node(add_forward, [a, b])
        return self.y

    def backward(self):
        self.a.grad = Node(add_backward, [self.y.grad])
        self.b.grad = Node(add_backward, [self.y.grad])

    def __call__(self, a, b):
        return self.forward(a, b)

def add_forward(a, b):
    return a + b

def add_backward(gy):
    return gy

def add(a, b):
    node_add = Add()
    return node_add(a, b)

In [38]:
class Sub(object):
    def forward(self, a, b):
        self.a = a
        self.b = b
        self.y = Node(sub_forward, [a, b])
        return self.y

    def backward(self):
        self.a.grad = Node(sub_backward_a, [self.y.grad])
        self.b.grad = Node(sub_backward_b, [self.y.grad])

    def __call__(self, a, b):
        return self.forward(a, b)

def sub_forward(a, b):
    return a - b

def sub_backward_a(gy):
    return gy

def sub_backward_b(gy):
    return -gy

In [39]:
class Mul(object):
    def forward(self, a, b):
        self.a = a
        self.b = b
        self.y = Node(mul_forward, [a, b])
        return self.y

    def backward(self):
        self.a.grad = Node(mul_backward, [self.b, self.y.grad])
        self.b.grad = Node(mul_backward, [self.a, self.y.grad])

    def __call__(self, a, b):
        return self.forward(a, b)

def mul_forward(a, b):
    return a * b

def mul_backward(x, gy):
    return x * gy

In [40]:
class Sum(object):
    def forward(self, x):
        self.x = x
        self.y = Node(sum_forward, [x])
        return self.y

    def backward(self):
        self.x.grad = Node(sum_backward, [self.x, self.y.grad])

    def __call__(self, x):
        return self.forward(x)

def sum_forward(x):
    return np.sum(x)

def sum_backward(x, gy):
    return np.ones_like(x) * gy

In [62]:
l0 = Linear( 5, 10)
l1 = Linear(10, 10)
l2 = Linear(10,  3)
sub3 = Sub()
mul4 = Mul()
sum5 = Sum()

In [42]:
# 真の係数
W = np.random.random((3, 5)).astype(np.float32)

In [51]:
batchsize = 8
x = Variable(np.random.random((batchsize, 5)).astype(np.float32))
t = Variable(np.matmul(x.data, W.T))

In [52]:
h0 = l0(x)
h1 = l1(h0)
y  = l2(h1)

dif_y_t = sub3(y, t)
loss = sum5(mul4(dif_y_t, dif_y_t))

In [53]:
loss.grad = Variable(1)

In [59]:
sum5.backward()
mul4.backward()
sub3.backward()
l2.backward()
l1.backward()
l0.backward()

In [61]:
l0.W.grad.data

array([[-0.17448576, -0.12396366, -0.17368382, -0.21801567, -0.13547616],
       [ 0.23901932,  0.1784434 ,  0.28673586,  0.3453216 ,  0.27966982],
       [ 0.18470582,  0.12594897,  0.16575038,  0.19146392,  0.22489803],
       [ 0.4409402 ,  0.33127028,  0.54326725,  0.6459553 ,  0.568679  ],
       [ 0.21001351,  0.15537557,  0.24353132,  0.2961584 ,  0.22539008],
       [ 0.98166025,  0.70743775,  1.0592948 ,  1.2572393 ,  1.173618  ],
       [-0.26472995, -0.19528773, -0.30999815, -0.36450174, -0.35181075],
       [-1.657927  , -1.1968415 , -1.7898829 , -2.1443279 , -1.8770658 ],
       [-0.4020802 , -0.29051042, -0.43778646, -0.51931304, -0.4849494 ],
       [-0.94823027, -0.683903  , -1.0218748 , -1.2215724 , -1.0864532 ]],
      dtype=float32)

## 学習テスト

In [67]:
lr = 0.001

for i in range(100):
    # make batch
    x = Variable(np.random.random((batchsize, 5)).astype(np.float32))
    t = Variable(np.matmul(x.data, W.T))
    
    # forward
    h0 = l0(x)
    h1 = l1(h0)
    y  = l2(h1)

    # loss
    dif_y_t = sub3(y, t)
    loss = sum5(mul4(dif_y_t, dif_y_t))
    
    # backward
    loss.grad = Variable(1)
    
    sum5.backward()
    mul4.backward()
    sub3.backward()
    l2.backward()
    l1.backward()
    l0.backward()
    
    # update
    l0.W.data -= lr * l0.W.grad.data
    l1.W.data -= lr * l1.W.grad.data
    l2.W.data -= lr * l2.W.grad.data
    
    print(loss.data)

0.43622115
0.34013852
0.27952713
0.46903676
0.38006333
0.43638194
0.2947218
0.50222605
0.38708603
0.28479886
0.5035801
0.4812018
0.48898917
0.3695941
0.3453398
0.39792928
0.37899005
0.46050546
0.46633
0.3511302
0.63206553
0.49406764
0.4064721
0.28691635
0.4381137
0.5857443
0.38138482
0.48468617
0.45428178
0.28868562
0.31658155
0.2790502
0.32902244
0.35131475
0.5700266
0.42452258
0.42105484
0.38461855
0.22674334
0.25425643
0.35525084
0.38023925
0.35353464
0.26514643
0.41488856
0.48902184
0.3090096
0.3125139
0.36662334
0.36845058
0.40047967
0.29440707
0.29390767
0.39879692
0.36840886
0.26612264
0.3250206
0.40592158
0.4503349
0.31340742
0.35602528
0.38381612
0.25926873
0.17217898
0.4756266
0.2098824
0.36401635
0.45687523
0.34307975
0.43184093
0.46871236
0.39925766
0.23448831
0.29725498
0.2907148
0.24454471
0.30848056
0.45815206
0.2201158
0.24983609
0.20530328
0.5917789
0.25915045
0.456494
0.33413425
0.3958828
0.26985157
0.23671898
0.22187178
0.336352
0.3454002
0.28728074
0.26518518
0.4497

In [68]:
y.data, t.data

(array([[1.5498765 , 1.1681993 , 1.0430547 ],
        [1.6770533 , 1.2405598 , 1.0832026 ],
        [1.7349929 , 1.2710277 , 1.1185577 ],
        [2.0274627 , 1.5346563 , 1.3645009 ],
        [1.4642233 , 1.099669  , 0.9885646 ],
        [1.3707267 , 1.0048237 , 0.86772615],
        [2.4010842 , 1.8111888 , 1.6130209 ],
        [1.9897652 , 1.4677737 , 1.2769117 ]], dtype=float32),
 array([[1.7393267, 1.1519265, 0.8866402],
        [1.797891 , 1.4282349, 1.2312665],
        [1.6633149, 1.0550609, 1.2571576],
        [2.1259358, 1.7245108, 1.2852108],
        [1.5034199, 1.226975 , 1.1889428],
        [1.4598958, 1.1640102, 1.0277932],
        [2.3565445, 1.9576244, 1.6499039],
        [2.0206528, 1.3398588, 1.1761358]], dtype=float32))

W.grad = Node(backward, [hoge])と代入しているがこれは加算にしないといけない