In [17]:
import sys
import numpy as np
import cupy as cp

sys.path.append('../')

import mandala
from mandala import Node
from mandala import Variable

import mandala.autodiff as ad
import mandala.autodiff.functions as F

In [18]:
class Model(ad.Graph):
    def __init__(self):
        super(Model, self).__init__()

        self.l0 = F.Linear(   5, 1000)
        self.l1 = F.Linear(1000, 1000)
        self.l2 = F.Linear(1000, 1000)
        self.l3 = F.Linear(1000, 1000)
        self.l4 = F.Linear(1000, 1000)
        self.l5 = F.Linear(1000, 1000)
        self.l6 = F.Linear(1000,    3)

    def __call__(self, x):
        h = F.relu(self.l0(x))
        h = F.relu(self.l1(h))
        h = F.relu(self.l2(h))
        h = F.relu(self.l3(h))
        h = F.relu(self.l4(h))
        h = F.relu(self.l5(h))
        y = F.relu(self.l6(h))
        return y

In [19]:
xp = cp

In [20]:
model = Model()
model.to_gpu()

In [21]:
# 真の係数
W = xp.arange(15, dtype=np.float32).reshape(3, 5)
b = xp.arange(3, dtype=np.float32)

In [27]:
model.subgraphs

{'l0': <mandala.autodiff.functions.linear.Linear at 0x7fedbdad0470>,
 'l1': <mandala.autodiff.functions.linear.Linear at 0x7fedbdad0588>,
 'l2': <mandala.autodiff.functions.linear.Linear at 0x7fedbdad0630>,
 'l3': <mandala.autodiff.functions.linear.Linear at 0x7fedbdad0c18>,
 'l4': <mandala.autodiff.functions.linear.Linear at 0x7fedbdad06a0>,
 'l5': <mandala.autodiff.functions.linear.Linear at 0x7fedbdb31550>,
 'l6': <mandala.autodiff.functions.linear.Linear at 0x7fedbdac4358>}

## Test

In [23]:
import time

In [24]:
batch_size = 32

In [25]:
s = time.time()
lr = 1e-4

for i in range(100):
    # make batch
    x = Variable(xp.random.random((batch_size, 5)).astype(np.float32))
    t = Variable(xp.matmul(x.data, W.T) + b)
    
    # forward
    y  = model(x)
    loss = (y - t) ** 2 / batch_size

    # loss
    loss = F.sum((y - t) ** 2) / batch_size

    # backward
    model.cleargrads()
    loss.backward()
    
    # update
    for p in model.params.values():
        p.data -= lr * p.grad.data

    print(loss.data)



print('time:', time.time() - s)

1465.1697
1377.5023
1255.2747
987.88574
1151.0443
330.19012
40.940514
7.5541234
2.5024438
2.096587
2.059877
3.3868232
4.651155
5.734512
4.041678
4.722045
3.417812
3.6086245
4.848629
7.400386
8.822976
10.159074
6.4284763
7.433393
7.2929983
8.142021
5.255807
6.1913633
3.960888
6.2092295
6.3371754
6.853778
5.6145644
9.076102
12.471974
20.838785
19.754498
22.927917
12.53281
18.66572
15.206768
17.087418
7.726942
7.9481115
5.811977
8.9317875
5.741173
6.0945187
5.6697397
4.3192625
2.5454612
1.3391639
0.5194106
0.67949134
0.5272204
0.5922139
0.73404396
0.6962881
0.8274764
0.8132
0.8734376
0.5183546
0.9371854
0.9192647
0.59785795
0.58753717
0.96124786
0.7866516
1.0506904
1.2100782
0.54250336
0.46345323
0.30857486
0.6757778
0.52989686
0.41163024
0.32805687
0.44030672
0.5382209
0.4477445
0.27252334
0.5257319
0.62917054
0.6507886
0.48263493
0.4788032
0.52230823
0.37600467
0.36755335
0.40876725
0.3687814
0.29227737
0.44269508
0.41219383
0.5623789
0.5061917
0.41838634
0.42867166
0.54501075
0.5435853

## Chainer との速度比較

In [10]:
import chainer
import chainer.functions as F
import chainer.links as L
from chainer import optimizer

  from ._conv import register_converters as _register_converters


In [11]:
class Model(chainer.Chain):
    def __init__(self):
        super().__init__()
        with self.init_scope():
            self.l0 = L.Linear(   5, 1000)
            self.l1 = L.Linear(1000, 1000)
            self.l2 = L.Linear(1000, 1000)
            self.l3 = L.Linear(1000, 1000)
            self.l4 = L.Linear(1000, 1000)
            self.l5 = L.Linear(1000, 1000)
            self.l6 = L.Linear(1000,    3)

    def __call__(self, x):
        h  = F.relu(self.l0(x))
        h  = F.relu(self.l1(h))
        h  = F.relu(self.l2(h))
        h  = F.relu(self.l3(h))
        h  = F.relu(self.l4(h))
        h  = F.relu(self.l5(h))
        y  = self.l6(h)
        return y

In [12]:
model = Model()
model.to_gpu()

<__main__.Model at 0x7fedbdba22b0>

In [13]:
opt = chainer.optimizers.SGD(lr=1e-4)
opt.setup(model)

<chainer.optimizers.sgd.SGD at 0x7fedbdac43c8>

In [14]:
batchsize = 32
s = time.time()

for i in range(100):
    # make batch
    x = xp.random.random((batchsize, 5)).astype(np.float32)
    t = xp.matmul(x, W.T) + b
    
    # forward
    y = model(x)

    # loss
    loss = F.mean_squared_error(y, t)

    # backward
    model.cleargrads()
    loss.backward()
    
    # update
    opt.update()

    print(loss.data)

print(time.time() - s)

469.14413
560.5114
479.89108
439.02774
398.0128
486.67892
434.71338
498.61557
513.89453
501.6971
464.2211
433.64822
416.07434
536.9349
477.8173
489.48837
494.19727
401.93628
445.39432
480.13217
506.47742
522.77313
410.80103
457.75757
399.5559
450.61612
478.848
448.16895
414.1567
448.49265
431.42007
347.9515
425.59286
334.4722
373.50952
317.1681
303.7303
278.90637
282.18857
225.18282
210.00865
220.16206
161.43806
116.8025
92.416954
49.78757
39.59161
19.688545
12.267301
6.3309293
1.9553394
1.6218678
1.7208099
1.29629
1.7037927
1.169364
1.2506849
1.7126869
1.150827
1.1362349
1.4127616
1.4066372
0.7393283
1.1587137
1.1940593
0.859771
1.1856561
1.0925846
1.0842143
1.4197389
1.2192324
1.1245869
1.1180047
1.3830324
0.8384678
0.9767073
1.4450407
1.3833526
0.8328821
0.7093623
0.86091423
1.1820178
1.1244105
0.9225852
1.2034284
1.129447
0.7484564
1.0831754
0.98497605
0.933878
1.3950139
1.0491279
0.9912393
0.975163
0.62471503
1.0899663
0.8614957
0.78640395
1.1347501
0.78920263
0.6778316497802734
