In [1]:
import numpy as np
import cffi

try:
    from pynq import Overlay

    # load Base Overlay
    Overlay("/home/xilinx/pynq/bitstream/base.bit").download()

    from pynq.drivers import xlnk
    IS_PYNQ = True
except:
    IS_PYNQ = False
    
import chainer
from chainer import links as L
from chainer import functions as F
from chainer import Variable

import sys
sys.path.append("..")

from pynq_chainer import functions as F_
from pynq_chainer import links as L_
from pynq_chainer import utils

%matplotlib inline
import matplotlib.pyplot as plt
import sys

In [2]:
IS_PYNQ

False

In [3]:
train, test = chainer.datasets.get_mnist()

x, label = train[0]

img = x.reshape(28,28)

In [4]:
input_image = x[np.newaxis,:]
input_image = input_image.astype(np.float32)
input_image = chainer.Variable(input_image)

# CPU 

In [5]:
class MLP(chainer.Chain):

    def __init__(self, n_units, n_out):
        super(MLP, self).__init__(
            # the size of the inputs to each layer will be inferred
            l1=L.Linear(None, n_units),  # n_in -> n_units
            l2=L.Linear(None, n_units),  # n_units -> n_units
            l3=L.Linear(None, n_out),  # n_units -> n_out
        )

    def __call__(self, x):
        h1 = F.relu(self.l1(x))
        h2 = F.relu(self.l2(h1))
        h3 = self.l3(h2)
        return h3

In [6]:
mlp_cpu = MLP(n_units=32, n_out=10)

resume = "../examples/mnist/mnist_iter_12000.npz"
chainer.serializers.load_npz(resume, mlp_cpu)

h = F.softmax(mlp_cpu(input_image))
y = F.argmax(h)

print(h.data)
print("result", y.data)
print("seikai", label)

[[  1.72932252e-13   1.29591280e-08   4.09194492e-08   2.45058211e-03
    3.96321289e-23   9.97549355e-01   1.99582668e-15   1.14123099e-12
    1.81230933e-11   1.20257918e-11]]
result 5
seikai 5


# FPGA

In [7]:
class MLP_FPGA(chainer.Chain):

    def __init__(self, n_units, n_out):
        super(MLP_FPGA, self).__init__(
            # the size of the inputs to each layer will be inferred
            l1=L_.Linear(None, n_units, nobias=True),  # n_in -> n_units
            l2=L_.Linear(None, n_units, nobias=True),  # n_units -> n_units
            l3=L_.Linear(None, n_out, nobias=True),  # n_units -> n_out
        )

    def __call__(self, x):
        if False:
            h1 = self.l1(x)
            print(h1.data)
            h2 = self.l2(h1)
            h3 = self.l3(h2)
            return h3
        
        h = self.l1(x)
        print(h)
        h1 = F.relu(h)
        print("h1 done")
        h2 = F.relu(self.l2(h1))
        print("h2 done")
        h3 = self.l3(h2)
        print("h3 done")
        print(x.shape)
        print(h1.shape)
        print(h1.data)
        print(h2.shape)
        print(h2.data)
        print(h3.shape)
        print(h3.data)
        print("call done")
        return h3

In [8]:
mlp_fpga = MLP_FPGA(n_units=32, n_out=10)

resume = "../examples/mnist/mnist_iter_12000.npz"
chainer.serializers.load_npz(resume, mlp_fpga)

cma alloc (dummy)
init model cma array W <cdata 'char[]' buffer len 100352 from 'memoryview' object> W
cma alloc (dummy)
init model cma array W <cdata 'char[]' buffer len 4096 from 'memoryview' object> W
cma alloc (dummy)
init model cma array W <cdata 'char[]' buffer len 1280 from 'memoryview' object> W


In [11]:
h = mlp_fpga(input_image)
#print(h.data)
h = F.softmax(h)
print(h.data)
y = F.argmax(h)
print("result", y.data)
print("seikai", label)

links <class 'chainer.variable.Variable'> <class 'chainer.variable.Variable'>
cma alloc (dummy)
links <class 'chainer.variable.Variable'> <class 'chainer.variable.Variable'>
cma alloc (dummy)
<var@1068a2470>
h1 done
links <class 'chainer.variable.Variable'> <class 'chainer.variable.Variable'>
cma alloc (dummy)
links <class 'chainer.variable.Variable'> <class 'chainer.variable.Variable'>
cma alloc (dummy)
h2 done
links <class 'chainer.variable.Variable'> <class 'chainer.variable.Variable'>
cma alloc (dummy)
links <class 'chainer.variable.Variable'> <class 'chainer.variable.Variable'>
cma alloc (dummy)
h3 done
(1, 784)
(1, 32)
[[ 3.05095577  0.          2.55680013  4.07168865  1.99023378  1.09072948
   0.          4.49700737  5.57241726  0.          0.          0.
   3.18601227  0.          0.          0.          0.          2.0017643
   0.73404318  3.90470552  3.48775458  3.08954358  0.          0.          0.
   3.60635543  3.32248259  0.          3.87468791  6.82556391  0.
   4.46334

In [None]:
[[ 3.05095577 -2.61691952  2.55680013  4.07168865  1.99023378  1.09072948
  -1.82831872  4.49700737  5.57241726 -4.70780516 -2.25808644 -4.83468103
   3.18601227 -3.28089046 -0.62773103 -0.6149773  -0.7455532   2.0017643
   0.73404318  3.90470552  3.48775458  3.08954358 -0.47251615 -0.90851831
  -0.327535    3.60635543  3.32248259 -3.12803578  3.87468791  6.82556391
  -0.1521184   4.46334553]]
cma alloc

In [None]:
mlp_fpga.l1.W.data

# Benchmark 

In [None]:
 %timeit -n 2 -o mlp_cpu(input_image)

In [None]:
 %timeit -n 2 -o mlp_fpga(in_fpga)

In [None]:
memmanager.xlnk_reset()