In [1]:
from sklearn.utils import shuffle
# from sklearn.datasets import fetch_mldata
from sklearn.datasets import fetch_openml
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

import numpy as np
import nngen as ng
from pathlib import Path
print(ng.__version__)

1.3.40


In [2]:
def sigmoid(x):
    return np.tanh(x * 0.5) * 0.5 + 0.5

def deriv_sigmoid(x):
    return sigmoid(x) * (1 - sigmoid(x))

def relu(x):
    return np.maximum(x, 0)

def deriv_relu(x):
    return (x > 0).astype(x.dtype)

def tanh(x):
    return np.tanh(x)

def deriv_tanh(x):
    return 1 - np.tanh(x)**2

# logの中身が 0になるのを防ぐ
def np_log(x):
    return np.log(np.clip(x, 1e-8, None))


def softmax(x):
    x -= x.max(axis=1, keepdims=True)
    x_exp = np.exp(x)
    return x_exp / np.sum(x_exp, axis=1, keepdims=True)

def deriv_softmax(x):
    return softmax(x) * (1 - softmax(x))

def identity(x):
    return x[:]

In [3]:
# mnist = fetch_mldata('MNIST original')
mnist = fetch_openml('mnist_784')

x_mnist = mnist.data.to_numpy().astype('float32') / 255.
t_mnist = np.eye(10)[mnist.target.to_numpy().astype('int32')]
print(x_mnist.shape)

x_train_mnist, x_test_mnist, t_train_mnist, t_test_mnist = train_test_split(x_mnist, t_mnist, test_size=50000)
x_train_mnist, x_valid_mnist, t_train_mnist, t_valid_mnist = train_test_split(x_train_mnist, t_train_mnist, test_size=10000)
print(x_train_mnist.shape)

  warn(


(70000, 784)
(10000, 784)


In [4]:
# data types
act_dtype = ng.int8
weight_dtype = ng.int8
bias_dtype = ng.int32
scale_dtype = ng.int8
batchsize = 1

if act_dtype.width > 8:
    act_scale_factor = 128
else:
    act_scale_factor = int(round(2 ** (act_dtype.width - 1) * 0.5))
input_scale_factors = {'l0': act_scale_factor}
input_means = {'l0': x_train_mnist.mean() * act_scale_factor}
input_stds = {'l0': x_train_mnist.std() * act_scale_factor}

class Matmul:
    def __init__(self, input_node, in_dim, out_dim, name, act=True):
        self.name = name
        self.W = np.random.uniform(low=-0.08, high=0.08,
                                   size=(in_dim, out_dim)).astype('float32')
        self.b = np.zeros(out_dim).astype('float32')
        if act:
            self.act = relu
            self.deriv_act = deriv_relu
        else:
            self.act = identity
            self.deriv_act = identity
        
        self.x = None
        self.u = None
        
        self.dW = 0
        self.db = 0

        self.input_node = input_node
        
        self.W_ng = ng.variable(dtype=weight_dtype, shape=(out_dim, in_dim))
        self.b_ng = ng.variable(dtype=bias_dtype, shape=(out_dim))
        self.s_ng = ng.variable(dtype=scale_dtype, shape=(out_dim))
        self.out_ng = ng.matmul(self.input_node.out_ng, self.W_ng,
            bias=self.b_ng, scale=self.s_ng, transposed_b=True,
            act_func=ng.relu if act else None, dtype=act_dtype, sum_dtype=bias_dtype)

    def reset_ng(self):
        out_dim, in_dim = self.W_ng.shape
        self.W_ng.__init__(dtype=weight_dtype, shape=(out_dim, in_dim))
        self.b_ng.__init__(dtype=bias_dtype, shape=(out_dim))
        self.s_ng.__init__(dtype=scale_dtype, shape=(out_dim))
        self.out_ng.__init__(self.input_node.out_ng, self.W_ng,
            bias=self.b_ng, scale=self.s_ng, transposed_b=True,
            act_func=ng.relu if self.out_ng.act_func else None, dtype=act_dtype, sum_dtype=bias_dtype)

    def forward_np(self, feed_dict):
        self.x = self.input_node.forward_np(feed_dict)
        self.u = np.matmul(self.x, self.W) + self.b
        return self.act(self.u)

    def backward_np(self, delta, W=None):
        if W is None: #出力層
            self.delta = delta
        else:
            self.delta = self.deriv_act(self.u) * np.matmul(delta, W.T)
        self.compute_grad()
        self.input_node.backward_np(self.delta, self.W)
    
    def compute_grad(self):
        batch_size = self.delta.shape[0]
        self.dW += np.matmul(self.x.T, self.delta) / batch_size
        self.db += np.matmul(np.ones(batch_size), self.delta) / batch_size

    def update_params(self, alpha):
        self.W -= alpha * self.dW
        self.b -= alpha * self.db
        self.dW = 0
        self.db = 0
        self.input_node.update_params(alpha)
        
    def sync_params(self):
        self.input_node.sync_params()
        self.reset_ng()
        self.W_ng.set_value(self.W.T)
        self.b_ng.set_value(self.b)
        self.s_ng.set_value(np.ones(self.s_ng.shape))
        
    def forward_ng(self, feed_dict):
        self.x = self.input_node.forward_ng(feed_dict)
        # 本当は間違いだが、backwardのderiv_reluに渡すためだけなら大丈夫
        self.u = ng.eval([self.out_ng], **feed_dict)[0].astype(float) / self.out_ng.scale_factor
        return self.u
    
    def backward_ng(self, delta, W=None):
        if W is None: #出力層
            self.delta = delta
        else:
            self.delta = self.deriv_act(self.u) * np.matmul(delta, W.T)
        self.compute_grad()
        self.input_node.backward_ng(self.delta, self.W)
    
    def save_params_np(self, path):
        self.input_node.save_params_np(path)
        np.save(Path(path) / (self.name + "_w.npy"), self.W)
        np.save(Path(path) / (self.name + "_b.npy"), self.b)

    def load_params_np(self, path):
        self.input_node.load_params_np(path)
        self.W = np.load(Path(path) / (self.name + "_w.npy"))
        self.b = np.load(Path(path) / (self.name + "_b.npy"))

class PlaceHolder:
    def __init__(self, ch, name):
        self.name = name
        self.out_ng = ng.placeholder(dtype=act_dtype, shape=(batchsize, ch), name=name)
    def forward_np(self, feed_dict):
        return feed_dict[self.name]
    def backward_np(self, delta, W):
        pass
    def update_params(self, alpha):
        pass
    def sync_params(self):
        pass
    def forward_ng(self, feed_dict):
        return feed_dict[self.name] / act_scale_factor
    def backward_ng(self, delta, W):
        pass
    def save_params_np(self, path):
        pass
    def load_params_np(self, path):
        pass

In [5]:
l0 = PlaceHolder(784, "l0")
l1 = Matmul(l0, 784, 100, "l1")
l2 = Matmul(l1, 100, 100, "l2")
l3 = Matmul(l2, 100, 10, "l3", False)

In [6]:
# --------------------
# (3) Assign hardware attributes
# --------------------

par_ich = 2
par_och = 2
axi_datawidth = 32

l1.out_ng.attribute(par_ich=par_ich, par_och=par_och)
l2.out_ng.attribute(par_ich=par_ich, par_och=par_och)
l3.out_ng.attribute(par_ich=par_ich, par_och=par_och)

# --------------------
# (5) Convert the NNgen dataflow to a hardware description (Verilog HDL and IP-XACT)
# --------------------

# to Veriloggen object
# targ = ng.to_veriloggen([output_layer], 'mlp', silent=silent,
#                        config={'maxi_datawidth': axi_datawidth})

# to IP-XACT (the method returns Veriloggen object, as well as to_veriloggen)
targ = ng.to_ipxact([l3.out_ng], 'mlp',
                    config={'maxi_datawidth': axi_datawidth})

# to Verilog HDL RTL (the method returns a source code text)
# rtl = ng.to_verilog([output_layer], 'mlp', silent=silent,
#                    config={'maxi_datawidth': axi_datawidth})


NNgen: Neural Network Accelerator Generator (version 1.3.40)
[IP-XACT]
  Output: mlp
[Configuration]
(AXI Master Interface)
  Data width   : 32
  Address width: 32
(AXI Slave Interface)
  Data width   : 32
  Address width: 32
[Schedule Table]
(Stage 0)
(Stage 1)
  <matmul None dtype:int8 shape:(1, 100) bias:(100,) scale:(100,) act_func:relu sum_dtype:int32 par_left_col:2 par_out_col:2 concur_out_col:4 stationary:right keep_left default_addr:91648 g_index:0 l_index:1 word_alignment:4 aligned_shape:(1, 100) scale_factor:1.000000>
  | <placeholder l0 dtype:int8 shape:(1, 784) default_addr:64 g_index:2 word_alignment:4 aligned_shape:(1, 784) scale_factor:1.000000>
  | <variable input_0 dtype:int8 shape:(100, 784) default_addr:896 g_index:3 word_alignment:4 aligned_shape:(100, 784) scale_factor:1.000000>
  | <variable input_1 dtype:int32 shape:(100,) default_addr:896 g_index:3 word_alignment:2 aligned_shape:(100,) scale_factor:1.000000>
  | <variable input_2 dtype:int8 shape:(100,) default_