In [1]:
import sys
import numpy as np
import cupy as cp

sys.path.append('../')

import mandala
from mandala import Node
from mandala import Variable

import mandala.autodiff as ad
import mandala.autodiff.functions as F

cp.cuda.Device(1).use()

In [2]:
class Model(ad.Graph):
    def __init__(self):
        super(Model, self).__init__()

        self.l0 = F.Linear(   5, 1000)
        self.l1 = F.Linear(1000, 1000)
        self.l2 = F.Linear(1000, 1000)
        self.l3 = F.Linear(1000, 1000)
        self.l4 = F.Linear(1000, 1000)
        self.l5 = F.Linear(1000, 1000)
        self.l6 = F.Linear(1000,    3)

    def __call__(self, x):
        h = F.relu(self.l0(x))
        h = F.relu(self.l1(h))
        h = F.relu(self.l2(h))
        h = F.relu(self.l3(h))
        h = F.relu(self.l4(h))
        h = F.relu(self.l5(h))
        y = F.relu(self.l6(h))
        return y

In [3]:
xp = cp

In [4]:
model = Model()
model.to_gpu()

In [5]:
W = xp.arange(15, dtype=np.float32).reshape(3, 5)
b = xp.arange(3, dtype=np.float32)

In [6]:
model.subgraphs

{'l0': <mandala.autodiff.functions.linear.Linear at 0x7fe1e685b208>,
 'l1': <mandala.autodiff.functions.linear.Linear at 0x7fe1e685b860>,
 'l2': <mandala.autodiff.functions.linear.Linear at 0x7fe1e685ba20>,
 'l3': <mandala.autodiff.functions.linear.Linear at 0x7fe1e685bf98>,
 'l4': <mandala.autodiff.functions.linear.Linear at 0x7fe1e685bfd0>,
 'l5': <mandala.autodiff.functions.linear.Linear at 0x7fe1e685b6d8>,
 'l6': <mandala.autodiff.functions.linear.Linear at 0x7fe1e685b550>}

## Test

In [7]:
import time

In [15]:
batch_size = 1024

In [16]:
model.l1.b.data.device

<CUDA Device 1>

In [17]:
s = time.time()
lr = 1e-4

for i in range(1000):
    # make batch
    x = Variable(xp.random.random((batch_size, 5)).astype(np.float32))
    t = Variable(xp.matmul(x.data, W.T) + b)
    
    # forward
    y  = model(x)
    loss = (y - t) ** 2 / batch_size

    # loss
    loss = F.sum((y - t) ** 2) / batch_size

    # backward
    model.cleargrads()
    loss.backward()

    # update
    for p in model.params.values():
        p.grad.reserve()

    for p in model.params.values():
        p.data -= lr * p.grad.data

    #print(loss.data)



print('time:', time.time() - s)

time: 4.973711252212524


In [18]:
p.__class__

mandala.nodecore.Variable

In [19]:
def check_memory(start):
    if start._data is not None:
        print(start.__class__, start.shape)
    for node in start.input_nodes:
        check_memory(node)

In [20]:
check_memory(x.grad)

<class 'mandala.nodecore.Variable'> (1000, 5)
<class 'mandala.nodecore.Variable'> (1024, 5)
<class 'mandala.nodecore.Variable'> (1000, 5)
<class 'mandala.nodecore.Variable'> (1000,)
<class 'mandala.nodecore.Variable'> (1024, 5)
<class 'mandala.nodecore.Variable'> (1000, 5)
<class 'mandala.nodecore.Variable'> (1000,)
<class 'mandala.nodecore.Variable'> (1000, 1000)
<class 'mandala.nodecore.Variable'> (1024, 5)
<class 'mandala.nodecore.Variable'> (1000, 5)
<class 'mandala.nodecore.Variable'> (1000,)
<class 'mandala.nodecore.Variable'> (1000, 1000)
<class 'mandala.nodecore.Variable'> (1000,)
<class 'mandala.nodecore.Variable'> (1024, 5)
<class 'mandala.nodecore.Variable'> (1000, 5)
<class 'mandala.nodecore.Variable'> (1000,)
<class 'mandala.nodecore.Variable'> (1000, 1000)
<class 'mandala.nodecore.Variable'> (1000,)
<class 'mandala.nodecore.Variable'> (1000, 1000)
<class 'mandala.nodecore.Variable'> (1024, 5)
<class 'mandala.nodecore.Variable'> (1000, 5)
<class 'mandala.nodecore.Variable'

AttributeError: 'int' object has no attribute 'shape'

In [21]:
%who

F	 Model	 Node	 Variable	 W	 ad	 b	 batch_size	 check_memory	 
cp	 i	 loss	 lr	 mandala	 model	 np	 p	 s	 
sys	 t	 time	 x	 xp	 y	 


## Chainer との速度比較

In [22]:
import chainer
import chainer.functions as F
import chainer.links as L
from chainer import optimizer

In [23]:
cp.cuda.Device(0).use()

In [24]:
W = xp.arange(15, dtype=np.float32).reshape(3, 5)
b = xp.arange(3, dtype=np.float32)

In [25]:
class Model(chainer.Chain):
    def __init__(self):
        super().__init__()
        with self.init_scope():
            self.l0 = L.Linear(   5, 1000)
            self.l1 = L.Linear(1000, 1000)
            self.l2 = L.Linear(1000, 1000)
            self.l3 = L.Linear(1000, 1000)
            self.l4 = L.Linear(1000, 1000)
            self.l5 = L.Linear(1000, 1000)
            self.l6 = L.Linear(1000,    3)

    def __call__(self, x):
        h  = F.relu(self.l0(x))
        h  = F.relu(self.l1(h))
        h  = F.relu(self.l2(h))
        h  = F.relu(self.l3(h))
        h  = F.relu(self.l4(h))
        h  = F.relu(self.l5(h))
        y  = self.l6(h)
        return y

In [26]:
model = Model()
model.to_gpu()

<__main__.Model at 0x7fe193e1f668>

In [27]:
opt = chainer.optimizers.SGD(lr=1e-4)
opt.setup(model)

<chainer.optimizers.sgd.SGD at 0x7fe193e35390>

In [28]:
batch_size = 1024
s = time.time()

for i in range(1000):
    # make batch
    x = xp.random.random((batch_size, 5)).astype(np.float32)
    t = xp.matmul(x, W.T) + b
    
    # forward
    y = model(x)

    # loss
    loss = F.mean_squared_error(y, t)

    # backward
    model.cleargrads()
    loss.backward(retain_grad=True)
    
    # update
    opt.update()

    # print(loss.data)

print(time.time() - s)

5.682805061340332


In [33]:
model.l0.W.data

array([[ 0.847817  ,  0.47937623, -0.00406083,  0.39935032,  0.00958933],
       [-0.03072025,  0.10420427,  0.84563243,  0.634149  , -0.66221374],
       [ 0.62243724,  0.10377856,  0.00802623, -0.2549301 , -0.14632824],
       ...,
       [-0.02014274, -0.02957321,  0.2109989 , -0.45022723, -0.38983467],
       [ 0.27564514, -0.7606694 ,  0.1707307 , -0.75222045, -0.51158404],
       [-0.04614544,  0.86378103, -0.01214676,  0.35443145,  0.00616781]],
      dtype=float32)

In [30]:
def hoge(x):
    pass

In [24]:
print (hoge(0))

None


In [25]:
y is None

False

In [26]:
y = None

In [27]:
print(y)

None
