In [0]:
google_colab = True

# Google Colab用事前準備

In [0]:
# install
if google_colab:
  !pip install pybind11
  %cd /content
  !nvcc -V
  !sudo rm -fr BinaryBrain
  !rm -fr BinaryBrain
  !git clone --recursive -b ver3_develop https://github.com/ryuz/BinaryBrain.git
  %cd /content/BinaryBrain/python
  !sh copy_src.sh
  !python3 setup.py build
  !python3 setup.py develop

In [4]:
# mount
if google_colab:
  from google.colab import drive 
  drive.mount('/content/drive')
  %cd /content/drive/My Drive/git-work/BinaryBrain_ver3_develop/tests/python

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive
/content/drive/My Drive/git-work/BinaryBrain_ver3_develop/tests/python


# メインコード

In [0]:
import binarybrain as bb
import numpy as np
import os
import sys
from tqdm import tqdm
from collections import OrderedDict

In [6]:
# load MNIST data
td = bb.load_mnist()

dwonload /root/.binarybrain/dataset/train-images-idx3-ubyte.gz from http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
dwonload /root/.binarybrain/dataset/train-labels-idx1-ubyte.gz from http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
dwonload /root/.binarybrain/dataset/t10k-images-idx3-ubyte.gz from http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
dwonload /root/.binarybrain/dataset/t10k-labels-idx1-ubyte.gz from http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz


## 元レイヤー

In [1]:
# create layer
layer0_affine  = bb.DenseAffine.create([32])
layer0_norm    = bb.BatchNormalization.create()
layer0_bin     = bb.Binarize.create()
layer1_affine  = bb.DenseAffine.create([32])
layer1_norm    = bb.BatchNormalization.create()
layer1_bin     = bb.Binarize.create()
layer2_affine  = bb.DenseAffine.create([64])
layer2_norm    = bb.BatchNormalization.create()
layer2_bin     = bb.Binarize.create()
layer3_affine  = bb.DenseAffine.create([64])
layer3_norm    = bb.BatchNormalization.create()
layer3_bin     = bb.Binarize.create()
layer4_affine  = bb.DenseAffine.create([512])
layer4_norm    = bb.BatchNormalization.create()
layer4_bin     = bb.Binarize.create()
layer5_affine  = bb.DenseAffine.create([10])
layer5_norm    = bb.BatchNormalization.create()
layer5_bin     = bb.Binarize.create()

# main network
cnv0_sub = bb.Sequential.create()
cnv0_sub.add(layer0_affine)
cnv0_sub.add(layer0_norm)
cnv0_sub.add(layer0_bin)
layer0_cnv = bb.LoweringConvolution.create(cnv0_sub, 3, 3)

cnv1_sub = bb.Sequential.create()
cnv1_sub.add(layer1_affine)
cnv1_sub.add(layer1_norm)
cnv1_sub.add(layer1_bin)
layer1_cnv = bb.LoweringConvolution.create(cnv1_sub, 3, 3)

cnv2_sub = bb.Sequential.create()
cnv2_sub.add(layer2_affine)
cnv2_sub.add(layer2_norm)
cnv2_sub.add(layer2_bin)
layer2_cnv = bb.LoweringConvolution.create(cnv2_sub, 3, 3)

cnv3_sub = bb.Sequential.create()
cnv3_sub.add(layer3_affine)
cnv3_sub.add(layer3_norm)
cnv3_sub.add(layer3_bin)
layer3_cnv = bb.LoweringConvolution.create(cnv3_sub, 3, 3)

main_net = bb.Sequential.create()
main_net.add(layer0_cnv)
main_net.add(layer1_cnv)
main_net.add(bb.MaxPooling.create(2, 2))
main_net.add(layer2_cnv)
main_net.add(layer3_cnv)
main_net.add(bb.MaxPooling.create(2, 2))
main_net.add(layer4_affine)
main_net.add(layer4_norm)
main_net.add(layer4_bin)
main_net.add(layer5_affine)
main_net.add(layer5_norm)
main_net.add(layer5_bin)

main_net.set_input_shape(td['x_shape'])

# Load
net_path = 'mnist-dense-cnn-binary'

layer0_affine.load_json(os.path.join(net_path, 'layer0_affine.json'))
layer1_affine.load_json(os.path.join(net_path, 'layer1_affine.json'))
layer2_affine.load_json(os.path.join(net_path, 'layer2_affine.json'))
layer3_affine.load_json(os.path.join(net_path, 'layer3_affine.json'))
layer4_affine.load_json(os.path.join(net_path, 'layer4_affine.json'))
layer5_affine.load_json(os.path.join(net_path, 'layer5_affine.json'))

layer0_norm.load_json(os.path.join(net_path, 'layer0_norm.json'))
layer1_norm.load_json(os.path.join(net_path, 'layer1_norm.json'))
layer2_norm.load_json(os.path.join(net_path, 'layer2_norm.json'))
layer3_norm.load_json(os.path.join(net_path, 'layer3_norm.json'))
layer4_norm.load_json(os.path.join(net_path, 'layer4_norm.json'))
layer5_norm.load_json(os.path.join(net_path, 'layer5_norm.json'))

NameError: ignored

## 蒸留先

In [0]:
layer_cnv0_sl0 = bb.SparseLut6.create([192])
layer_cnv0_sl1 = bb.SparseLut6.create([32])

layer_cnv1_sl0 = bb.SparseLut6.create([192*6])
layer_cnv1_sl1 = bb.SparseLut6.create([192])
layer_cnv1_sl2 = bb.SparseLut6.create([192])
layer_cnv1_sl3 = bb.SparseLut6.create([32])
#layer_cnv1_sl0 = bb.SparseLut6.create([192])
#layer_cnv1_sl1 = bb.SparseLut6.create([32])

layer_cnv2_sl0 = bb.SparseLut6.create([384])
layer_cnv2_sl1 = bb.SparseLut6.create([64])

layer_cnv3_sl0 = bb.SparseLut6.create([384])
layer_cnv3_sl1 = bb.SparseLut6.create([64])

layer_sl4_0    = bb.SparseLut6.create([512*6])
layer_sl4_1    = bb.SparseLut6.create([512])
layer_sl5_0    = bb.SparseLut6.create([10*6*6])
layer_sl5_1    = bb.SparseLut6.create([10*6])
layer_sl5_2    = bb.SparseLut6.create([10])

# main network
target_cnv0_sub = bb.Sequential.create()
target_cnv0_sub.add(layer_cnv0_sl0)
target_cnv0_sub.add(layer_cnv0_sl1)
target_layer0_cnv = bb.LoweringConvolution.create(target_cnv0_sub, 3, 3)

target_cnv1_sub = bb.Sequential.create()
target_cnv1_sub.add(layer_cnv1_sl0)
target_cnv1_sub.add(layer_cnv1_sl1)
target_cnv1_sub.add(layer_cnv1_sl2)
target_cnv1_sub.add(layer_cnv1_sl3)
target_layer1_cnv = bb.LoweringConvolution.create(target_cnv1_sub, 3, 3)

target_cnv2_sub = bb.Sequential.create()
target_cnv2_sub.add(layer_cnv2_sl0)
target_cnv2_sub.add(layer_cnv2_sl1)
target_layer2_cnv = bb.LoweringConvolution.create(target_cnv2_sub, 3, 3)

target_cnv3_sub = bb.Sequential.create()
target_cnv3_sub.add(layer_cnv3_sl0)
target_cnv3_sub.add(layer_cnv3_sl1)
target_layer3_cnv = bb.LoweringConvolution.create(target_cnv3_sub, 3, 3)

target_layer4 = bb.Sequential.create()
target_layer4.add(layer_sl4_0)
target_layer4.add(layer_sl4_1)

target_layer5 = bb.Sequential.create()
target_layer5.add(layer_sl5_0)
target_layer5.add(layer_sl5_1)
target_layer5.add(layer_sl5_2)

target_net = bb.Sequential.create()
target_net.add(target_layer0_cnv)
target_net.add(target_layer1_cnv)
target_net.add(bb.MaxPooling.create(2, 2))
target_net.add(target_layer0_cnv)
target_net.add(target_layer1_cnv)
target_net.add(bb.MaxPooling.create(2, 2))
target_net.add(target_layer4)
target_net.add(target_layer5)

target_net.set_input_shape(td['x_shape'])

[10]

In [0]:
tW = layer1_affine.W()
W = np.array(tW.get_data()).reshape(tW.get_shape()[::-1])
idx = np.argsort(-np.abs(W), axis=1)

In [0]:
if False:
    for i in range(32):
        for j in range(6):
            layer_cnv1_sl1.set_connection_index([i], j, i*6+j)

    for i in range(32):
        for j in range(6*6):
            layer_cnv1_sl0.set_connection_index([i], j, idx[i][j])

In [0]:
if False:
    for i in range(32):
        for j in range(6):
            layer_cnv1_sl3.set_connection_index([i], j, i*6+j)
    
    for i in range(32):
        for j in range(6*6):
            layer_cnv1_sl2.set_connection_index([i], j, i*6*6+j)
            
    for i in range(32):
        for j in range(6*6):
            layer_cnv1_sl1.set_connection_index([i], j, i*6*6+j)

    for i in range(32):
        for j in range(6*6*6):
#            if j < len(idx[i]):
            layer_cnv1_sl0.set_connection_index([i], j, idx[i][j % len(idx[i])])

In [0]:
tW = layer1_affine.W()
W = np.array(tW.get_data()).reshape(tW.get_shape()[::-1])

In [0]:
idx = np.argsort(-np.abs(W), axis=1)

In [0]:
def layer_distillation(x, x_shape, target_net, ref_net, pre_net):
    x_buf = bb.FrameBuffer()
    t_buf = bb.FrameBuffer()

    print(ref_net.get_input_shape())

    target_net.set_input_shape(ref_net.get_input_shape())
    target_net.send_command("binary true")
    
    pre_net.set_input_shape(x_shape)

    batch_size = len(x)
    max_batch_size = 32
    leave = True

    loss = bb.LossMeanSquaredError.create()
    optimizer = bb.OptimizerAdam.create()
    optimizer.set_variables(target_net.get_parameters(), target_net.get_gradients())

#   x_shape = td['x_shape']
#   x_shape = td['x_shape']
    
    for epoch in range(8):
    #   for index in tqdm(range(0, batch_size, max_batch_size)):
        loss.clear()
        with tqdm(range(0, batch_size, max_batch_size), leave=leave) as pbar:
            for index in pbar:
                # calc mini_batch_size
                mini_batch_size = min(max_batch_size, batch_size-index)
                
                # setup x
                x_buf.resize(mini_batch_size, x_shape)
                x_buf.set_data(x[index:index+mini_batch_size])
                
                # forward
                x_buf = pre_net.forward(x_buf, False)
    #            print('\n')
    #            print(x_buf.get_node_shape())
    #            print(ref_net.get_input_shape())
    #            print(target_net.get_input_shape())

                t_buf = ref_net.forward(x_buf, False)
                y_buf = target_net.forward(x_buf, True)
                
                # calc loss
                dy_buf = loss.calculate_loss(y_buf, t_buf, mini_batch_size)

                # backward
                target_net.backward(dy_buf)

                # update
                optimizer.update()
                
                # print progress
                dict = OrderedDict()
                dict['loss'] = loss.get_loss()
                if len(dict) > 0:
                    pbar.set_postfix(dict)

In [0]:
pre_net = bb.Sequential.create()
pre_net.add(bb.RealToBinary.create(4, framewise=True))

In [0]:
pre_net.add(layer0_cnv)

In [0]:
layer_distillation(td['x_train'], td['x_shape'], target_layer1_cnv, layer1_cnv, pre_net)

[26, 26, 32]


100%|██████████████████████████████████████████████████████████████████| 1875/1875 [28:11<00:00,  1.11it/s, loss=0.283]
100%|██████████████████████████████████████████████████████████████████| 1875/1875 [27:51<00:00,  1.12it/s, loss=0.251]
100%|██████████████████████████████████████████████████████████████████| 1875/1875 [28:34<00:00,  1.09it/s, loss=0.234]
100%|██████████████████████████████████████████████████████████████████| 1875/1875 [30:08<00:00,  1.04it/s, loss=0.234]
100%|██████████████████████████████████████████████████████████████████| 1875/1875 [30:27<00:00,  1.03it/s, loss=0.223]
100%|██████████████████████████████████████████████████████████████████| 1875/1875 [32:11<00:00,  1.03s/it, loss=0.217]
100%|██████████████████████████████████████████████████████████████████| 1875/1875 [31:37<00:00,  1.01s/it, loss=0.215]
100%|██████████████████████████████████████████████████████████████████| 1875/1875 [29:50<00:00,  1.05it/s, loss=0.215]


In [0]:
#layer_cnv1_sl0.save_json(os.path.join(net_path, 'layer1_cnv_sl0.json'))
#layer_cnv1_sl1.save_json(os.path.join(net_path, 'layer1_cnv_sl1.json'))

In [0]:
layer_cnv1_sl0 = bb.SparseLut6.create([192])
layer_cnv1_sl1 = bb.SparseLut6.create([32])


In [0]:
layer_cnv1_sl1.set_input_shape([192])
#layer_cnv1_sl1.set_connection_index([0], 0, 0)

[32]

In [0]:
for i in range(32):
    for j in range(6):
        layer_cnv1_sl1.set_connection_index([i], j, i*6+j)

In [0]:


#layer_cnv1_sl1.get_connection([0], 0)
layer_cnv1_sl1.get_output_shape()

[32]

In [0]:
M = W.copy()
idx = np.argsort(-np.abs(M)[0])
print(idx)
for i in range(9):
    print('%d  %f' % (idx[i], M[0][idx[i]]))

[213 123  92 259 209  96 164 119  93 267 211 210 268 200 178 186 263  41
 258 166 158 194 262 122 214 169 205  65 128  88  14  38  10   1 274  71
   2 208 182  13  68  12 174 257  85 245   5 224 172  48  11 106  25 272
  94 188 133 202  91 266 283  95 124 173  17 286 168  66 150 207 256 121
  83 144 163 192 254 270 218 279  78  18 155  59 250  46 221  77 101 277
 278 180 102 114 170 130 161  15 189  30 108 285  27 143 177 255  16 216
 191 265 249  44 142  62 134 271 287  55 231 225 109  63  89 273  34 201
  74   7  58  79  51 243 131  36 244 107 233 118 137 230  49  29 104 195
 234 212 167 240  64 241  40  72 217 165  97  61  42 129  75 219 228  50
 149 147 281 227 113  67 215 148 136 115   6 220 239  90  19 196 247 223
 199 120 159 206 280 248 246 242  98  31 154 275 193 269 179 253   9 140
  73 190  24  87 126 236 117 156 264  86 197 203 141 276 145 198  37  53
 184 153 260  28 181  21 157 139  39   3 183 252  35 282  76 116  70 146
 100 151  56 162 237 232 110 127 229  23  26 175 17

In [0]:
def get_sortarg(M):
    index = np.zeros_like(M, dtype=int)
    for i in range(len(M)):
        index[i] = np.argsort(-np.abs(M[i]))
    return index

idx = get_sortarg(W)

In [0]:
idx = get_sortarg(W)

In [0]:
for i in range(64):
    print("%d %f" % (idx[0][i], W[0][idx[0][i]]))


213 0.625557
123 0.612855
92 0.513268
259 -0.431354
209 0.425770
96 0.417591
164 -0.393381
119 0.389183
93 0.363127
267 0.346345
211 0.341455
210 0.329749
268 0.324445
200 0.323485
178 0.321366
186 -0.320791
263 0.319501
41 0.312180
258 -0.304235
166 -0.296559
158 0.289072
194 -0.288853
262 -0.287436
122 0.280644
214 0.279326
169 0.278695
205 0.278291
65 -0.273646
128 0.270545
88 0.268212
14 0.258259
38 0.251168
10 0.251001
1 0.250480
274 0.247326
71 -0.244706
2 0.244142
208 0.238619
182 -0.236117
13 0.232247
68 -0.231580
12 0.229702
174 0.229211
257 -0.226741
85 -0.226691
245 0.225787
5 0.223860
224 -0.221093
172 -0.213886
48 -0.212861
11 0.212523
106 -0.212507
25 -0.211504
272 0.209848
94 0.209082
188 0.206546
133 0.206496
202 0.204975
91 0.204754
266 0.199720
283 0.198510
95 0.198481
124 0.193853
173 -0.193404
