In [1]:
import sys
import numpy as np
import time

try:
    import cupy as cp
    xp = cp
except:
    xp = np

sys.path.append('../')

import mandala
from mandala import Node
from mandala import Variable

import mandala.autodiff as ad
import mandala.autodiff.functions as F

In [2]:
W = xp.arange(15, dtype=np.float32).reshape(3, 5)
b = xp.arange(3, dtype=np.float32)

In [3]:
batch_size = 128

## Mandala

In [4]:
class Model(ad.Graph):
    def __init__(self):
        super(Model, self).__init__()

        self.l0 = F.Linear(   5, 1000)
        self.l1 = F.Linear(1000, 1000)
        self.l2 = F.Linear(1000, 1000)
        self.l3 = F.Linear(1000, 1000)
        self.l4 = F.Linear(1000, 1000)
        self.l5 = F.Linear(1000, 1000)
        self.l6 = F.Linear(1000,    3)

    def __call__(self, x):
        h = F.relu(self.l0(x))
        h = F.relu(self.l1(h))
        h = F.relu(self.l2(h))
        h = F.relu(self.l3(h))
        h = F.relu(self.l4(h))
        h = F.relu(self.l5(h))
        y = F.relu(self.l6(h))
        return y

In [5]:
model = Model()
if not xp == np:
    model.to_gpu()

In [6]:
model.subgraphs

{'l0': <mandala.autodiff.functions.linear.Linear at 0x7f4672bff898>,
 'l1': <mandala.autodiff.functions.linear.Linear at 0x7f4672bff940>,
 'l2': <mandala.autodiff.functions.linear.Linear at 0x7f4672bff9e8>,
 'l3': <mandala.autodiff.functions.linear.Linear at 0x7f4672bffb00>,
 'l4': <mandala.autodiff.functions.linear.Linear at 0x7f4672bffc18>,
 'l5': <mandala.autodiff.functions.linear.Linear at 0x7f4672bffd30>,
 'l6': <mandala.autodiff.functions.linear.Linear at 0x7f4672bffe48>}

In [7]:
s = time.time()
lr = 1e-4

for i in range(100):
    # make batch
    x = Variable(xp.random.random((batch_size, 5)).astype(np.float32))
    t = Variable(xp.matmul(x.data, W.T) + b)
    
    # forward
    y = model(x)
    loss = (y - t) ** 2 / batch_size

    # loss
    loss = F.sum((y - t) ** 2) / batch_size
    # backward
    model.cleargrads()
    #loss.backward()

    loss.data

    #for p in model.params.values():
    #    p.data -= lr * p.grad.data

print('time:', time.time() - s)

time: 0.7418503761291504


In [17]:
y._data.shape

(128, 3)

In [19]:
y._reference_count

0

In [10]:
!nvidia-smi

Sun Aug  5 15:18:58 2018       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 396.26                 Driver Version: 396.26                    |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|   0  Tesla K80           Off  | 00000000:00:1E.0 Off |                    0 |
| N/A   47C    P0   103W / 149W |    166MiB / 11441MiB |     96%      Default |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Processes:                                                       GPU Memory |
|  GPU       PID   Type   Process name                             Usage      |
|    0  

## Chainer

In [4]:
import chainer
import chainer.functions as F
import chainer.links as L
from chainer import optimizer

In [5]:
class Model(chainer.Chain):
    def __init__(self):
        super().__init__()
        with self.init_scope():
            self.l0 = L.Linear(   5, 1000)
            self.l1 = L.Linear(1000, 1000)
            self.l2 = L.Linear(1000, 1000)
            self.l3 = L.Linear(1000, 1000)
            self.l4 = L.Linear(1000, 1000)
            self.l5 = L.Linear(1000, 1000)
            self.l6 = L.Linear(1000,    3)

    def __call__(self, x):
        h  = F.relu(self.l0(x))
        h  = F.relu(self.l1(h))
        h  = F.relu(self.l2(h))
        h  = F.relu(self.l3(h))
        h  = F.relu(self.l4(h))
        h  = F.relu(self.l5(h))
        y  = self.l6(h)
        return y

In [6]:
model = Model()
if not xp == np:
    model.to_gpu()

In [7]:
opt = chainer.optimizers.SGD(lr=1e-4)
opt.setup(model)

<chainer.optimizers.sgd.SGD at 0x7f3da67d8e80>

In [8]:
s = time.time()

for i in range(100):
    # make batch
    x = xp.random.random((batch_size, 5)).astype(np.float32)
    t = xp.matmul(x, W.T) + b
    
    # forward
    y = model(x)

    # loss
    loss = F.mean_squared_error(y, t)

    # backward
    model.cleargrads()
    loss.backward(retain_grad=False)
    
    # update
    opt.update()

    # print(loss.data)

print(time.time() - s)

0.8809006214141846


In [9]:
!nvidia-smi

Sun Aug  5 15:17:58 2018       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 396.26                 Driver Version: 396.26                    |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|   0  Tesla K80           Off  | 00000000:00:1E.0 Off |                    0 |
| N/A   48C    P0   123W / 149W |    146MiB / 11441MiB |     54%      Default |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Processes:                                                       GPU Memory |
|  GPU       PID   Type   Process name                             Usage      |
|    0  

6.99s で 139MiB