In [1]:
import numpy as np
import sys

sys.path.append('../src/')

from nodecore import Node
from nodecore import Variable

In [2]:
class Linear(object):
    def __init__(self, in_ch, out_ch):
        self.W = Variable(
            np.random.normal(0, 1 / out_ch, (out_ch, in_ch)).astype('f')
        )
    
    def forward(self, x):
        self.x = x
        self.y = Node(linear_forward, [self.x, self.W])
        return self.y

    def backward(self):
        self.W.grad = Node(linear_backward_W, [self.x, self.y.grad])
        self.x.grad = Node(linear_backward_x, [self.W, self.y.grad])
    
    def __call__(self, x):
        return self.forward(x)

def linear_forward(x, W):
    y = np.matmul(x, W.T)
    return y

def linear_backward_W(x, gy):
    gW = np.matmul(gy.T, x)
    return gW

def linear_backward_x(W, gy):
    gx = np.matmul(gy, W)
    return gx

In [3]:
class Add(object):
    def forward(self, a, b):
        self.a = a
        self.b = b
        self.y = Node(add_forward, [a, b])
        return self.y

    def backward(self):
        self.a.grad = Node(add_backward, [self.y.grad])
        self.b.grad = Node(add_backward, [self.y.grad])

    def __call__(self, a, b):
        return self.forward(a, b)

def add_forward(a, b):
    return a + b

def add_backward(gy):
    return gy

def add(a, b):
    node_add = Add()
    return node_add(a, b)

In [4]:
class Sub(object):
    def forward(self, a, b):
        self.a = a
        self.b = b
        self.y = Node(sub_forward, [a, b])
        return self.y

    def backward(self):
        self.a.grad = Node(sub_backward_a, [self.y.grad])
        self.b.grad = Node(sub_backward_b, [self.y.grad])

    def __call__(self, a, b):
        return self.forward(a, b)

def sub_forward(a, b):
    return a - b

def sub_backward_a(gy):
    return gy

def sub_backward_b(gy):
    return -gy

In [5]:
class Mul(object):
    def forward(self, a, b):
        self.a = a
        self.b = b
        self.y = Node(mul_forward, [a, b])
        return self.y

    def backward(self):
        self.a.grad = Node(mul_backward, [self.b, self.y.grad])
        self.b.grad = Node(mul_backward, [self.a, self.y.grad])

    def __call__(self, a, b):
        return self.forward(a, b)

def mul_forward(a, b):
    return a * b

def mul_backward(x, gy):
    return x * gy

In [6]:
class Sum(object):
    def forward(self, x):
        self.x = x
        self.y = Node(sum_forward, [x])
        return self.y

    def backward(self):
        self.x.grad = Node(sum_backward, [self.x, self.y.grad])

    def __call__(self, x):
        return self.forward(x)

def sum_forward(x):
    return np.sum(x)

def sum_backward(x, gy):
    return np.ones_like(x) * gy

In [7]:
l0 = Linear( 5, 10)
l1 = Linear(10, 10)
l2 = Linear(10,  3)
sub3 = Sub()
mul4 = Mul()
sum5 = Sum()

In [8]:
# 真の係数
W = np.random.random((3, 5)).astype(np.float32)

In [9]:
batchsize = 8
x = Variable(np.random.random((batchsize, 5)).astype(np.float32))
t = Variable(np.matmul(x.data, W.T))

In [10]:
h0 = l0(x)
h1 = l1(h0)
y  = l2(h1)

dif_y_t = sub3(y, t)
loss = sum5(mul4(dif_y_t, dif_y_t))

In [11]:
loss.grad = Variable(1)

In [12]:
sum5.backward()
mul4.backward()
sub3.backward()
l2.backward()
l1.backward()
l0.backward()

In [13]:
l0.W.grad.data

array([[-0.05047692, -0.04583379, -0.02766792, -0.06458288, -0.0548459 ],
       [ 0.5010739 ,  0.39342761,  0.36980706,  0.42696244,  0.44581848],
       [-2.69119263, -2.14639473, -1.93818164, -2.20127654, -2.36037779],
       [ 0.16421615,  0.14169762,  0.1019178 ,  0.15826198,  0.15558778],
       [-0.32672578, -0.27973613, -0.20633459, -0.2968933 , -0.30168769],
       [-1.93166375, -1.53746843, -1.39588189, -1.57882571, -1.693277  ],
       [-1.00775969, -0.79917854, -0.73286432, -0.80766749, -0.876248  ],
       [ 1.4861238 ,  1.15786505,  1.11121058,  1.20547986,  1.29539812],
       [-1.62742686, -1.30167127, -1.16679275, -1.31817961, -1.42242336],
       [-3.43118   , -2.74285316, -2.4622097 , -2.78242159, -3.00011539]], dtype=float32)

## 学習テスト

In [14]:
lr = 0.001

for i in range(100):
    # make batch
    x = Variable(np.random.random((batchsize, 5)).astype(np.float32))
    t = Variable(np.matmul(x.data, W.T))
    
    # forward
    h0 = l0(x)
    h1 = l1(h0)
    y  = l2(h1)

    # loss
    dif_y_t = sub3(y, t)
    loss = sum5(mul4(dif_y_t, dif_y_t))
    
    # backward
    loss.grad = Variable(1)
    
    sum5.backward()
    mul4.backward()
    sub3.backward()
    l2.backward()
    l1.backward()
    l0.backward()
    
    # update
    l0.W.data -= lr * l0.W.grad.data
    l1.W.data -= lr * l1.W.grad.data
    l2.W.data -= lr * l2.W.grad.data
    
    print(loss.data)

51.2562
31.2824
38.1251
41.8289
29.5416
26.2222
38.35
45.0232
25.2931
36.914
24.1995
44.6067
36.4646
31.298
38.0644
35.3944
36.2712
36.2164
22.3982
35.1409
34.2189
25.2897
20.3706
35.6483
29.3707
23.3739
28.814
26.5349
23.0588
29.8155
26.1272
15.6531
18.0121
20.7852
12.6815
20.8896
10.0856
12.4411
17.2221
13.1953
13.4904
13.7674
9.98207
5.43043
8.37379
6.19839
8.18385
10.3069
7.24686
3.90009
5.64527
5.40399
3.25546
2.50586
2.38887
2.55661
4.16
4.2606
1.97739
1.1981
1.81187
2.27201
0.950636
1.52067
1.69456
0.827481
0.698799
1.04792
1.42378
1.60651
1.17064
0.983331
0.915311
0.46753
0.670402
0.704423
0.833394
0.789159
0.90864
0.903969
0.84604
0.826496
0.485861
1.04132
0.897414
0.903577
0.55741
0.831185
0.767628
0.612234
0.428293
0.488028
0.448398
0.657251
1.47951
0.523366
0.511343
0.846223
0.29173
0.689548


In [15]:
y.data, t.data

(array([[ 1.49480021,  1.36799705,  0.991364  ],
        [ 1.59349108,  1.42237818,  1.04920137],
        [ 0.67390949,  0.59562385,  0.43615013],
        [ 1.77337492,  1.6086874 ,  1.17290854],
        [ 1.1251663 ,  1.01930988,  0.75291014],
        [ 1.64332366,  1.47560799,  1.09703469],
        [ 0.91083539,  0.81226569,  0.6011613 ],
        [ 1.47039366,  1.32274354,  0.96522522]], dtype=float32),
 array([[ 1.61357653,  1.77865326,  1.02604902],
        [ 1.63375974,  1.60560286,  0.91914147],
        [ 0.73774105,  0.52081048,  0.4729915 ],
        [ 1.99669564,  2.10242891,  1.18826842],
        [ 1.13131106,  1.07039201,  0.7167778 ],
        [ 1.57323253,  1.52357733,  0.92267299],
        [ 1.04702318,  0.96931267,  0.57307184],
        [ 1.28425932,  1.16649127,  0.90242654]], dtype=float32))

W.grad = Node(backward, [hoge])と代入しているがこれは加算にしないといけない