In [1]:
google_colab = False

# Google Colab用事前準備

In [2]:
# install
if google_colab:
  !pip install pybind11
  %cd /content
  !nvcc -V
  !sudo rm -fr BinaryBrain
  !rm -fr BinaryBrain
  !git clone --recursive -b ver3_develop https://github.com/ryuz/BinaryBrain.git
  %cd /content/BinaryBrain/python
  !sh copy_src.sh
  !python3 setup.py build
  !python3 setup.py develop

  import binarybrain as bb

In [3]:
import binarybrain as bb
import importlib
importlib.reload(bb)
importlib

<module 'importlib' from 'c:\\users\\ryuji\\appdata\\local\\programs\\python\\python37\\lib\\importlib\\__init__.py'>

In [4]:
# mount
if google_colab:
  from google.colab import drive 
  drive.mount('/content/drive')
  %cd /content/drive/My Drive/git-work/BinaryBrain_ver3_develop/tests/python

In [5]:
import binarybrain as bb
prop = bb.get_device_properties()
print(prop)


name                     : GeForce GT 1030
totalGlobalMem           : 2147483648
sharedMemPerBlock        : 49152
regsPerBlock             : 65536
warpSize                 : 32
memPitch                 : 2147483647
maxThreadsPerBlock       : 1024
maxThreadsDim[0]         : 1024
maxThreadsDim[1]         : 1024
maxThreadsDim[2]         : 64
maxGridSize[0]           : 2147483647
maxGridSize[1]           : 65535
maxGridSize[2]           : 65535
clockRate                : 1468000
totalConstMem            : 65536
major                    : 6
minor                    : 1
textureAlignment         : 512
deviceOverlap            : 1
multiProcessorCount      : 3
kernelExecTimeoutEnabled : 1
integrated               : 0
canMapHostMemory         : 1
computeMode              : 0




# メインコード

In [6]:
import binarybrain as bb
import numpy as np
import os
import sys
from tqdm import tqdm
from collections import OrderedDict

In [7]:
# load MNIST data
td = bb.load_mnist()

## 元レイヤー

In [None]:
modulation_size = 16
layer_rel2bin = bb.RealToBinary.create(modulation_size, framewise=True)
layer_bin2rel = bb.BinaryToReal.create(modulation_size)

# create layer
layer0_affine  = bb.DenseAffine.create([32])
layer0_norm    = bb.BatchNormalization.create()
layer0_act     = bb.Binarize.create()
layer1_affine  = bb.DenseAffine.create([32])
layer1_norm    = bb.BatchNormalization.create()
layer1_act     = bb.Binarize.create()
layer2_affine  = bb.DenseAffine.create([64])
layer2_norm    = bb.BatchNormalization.create()
layer2_act     = bb.Binarize.create()
layer3_affine  = bb.DenseAffine.create([64])
layer3_norm    = bb.BatchNormalization.create()
layer3_act     = bb.Binarize.create()
layer4_affine  = bb.DenseAffine.create([512])
layer4_norm    = bb.BatchNormalization.create()
layer4_act     = bb.Binarize.create()
layer5_affine  = bb.DenseAffine.create([10])
layer5_norm    = bb.BatchNormalization.create()
layer5_act     = bb.Binarize.create()

# main network
cnv0_sub = bb.Sequential.create()
cnv0_sub.add(layer0_affine)
cnv0_sub.add(layer0_norm)
cnv0_sub.add(layer0_act)
layer0_cnv = bb.LoweringConvolution.create(cnv0_sub, 3, 3)

cnv1_sub = bb.Sequential.create()
cnv1_sub.add(layer1_affine)
cnv1_sub.add(layer1_norm)
cnv1_sub.add(layer1_act)
layer1_cnv = bb.LoweringConvolution.create(cnv1_sub, 3, 3)

cnv2_sub = bb.Sequential.create()
cnv2_sub.add(layer2_affine)
cnv2_sub.add(layer2_norm)
cnv2_sub.add(layer2_act)
layer2_cnv = bb.LoweringConvolution.create(cnv2_sub, 3, 3)

cnv3_sub = bb.Sequential.create()
cnv3_sub.add(layer3_affine)
cnv3_sub.add(layer3_norm)
cnv3_sub.add(layer3_act)
layer3_cnv = bb.LoweringConvolution.create(cnv3_sub, 3, 3)

main_net = bb.Sequential.create()

main_net.add(layer_rel2bin)

main_net.add(layer0_cnv)
main_net.add(layer1_cnv)
main_net.add(bb.MaxPooling.create(2, 2))
main_net.add(layer2_cnv)
main_net.add(layer3_cnv)
main_net.add(bb.MaxPooling.create(2, 2))
main_net.add(layer4_affine)
main_net.add(layer4_norm)
main_net.add(layer4_act)
main_net.add(layer5_affine)
main_net.add(layer5_norm)
main_net.add(layer5_act)

main_net.add(layer_bin2rel)

main_net.set_input_shape(td['x_shape'])

# Load
net_path = 'mnist-dense-cnn'
os.makedirs(net_path, exist_ok=True)

if True:
    layer0_affine.load_json(os.path.join(net_path, 'layer0_affine.json'))
    layer1_affine.load_json(os.path.join(net_path, 'layer1_affine.json'))
    layer2_affine.load_json(os.path.join(net_path, 'layer2_affine.json'))
    layer3_affine.load_json(os.path.join(net_path, 'layer3_affine.json'))
    layer4_affine.load_json(os.path.join(net_path, 'layer4_affine.json'))
    layer5_affine.load_json(os.path.join(net_path, 'layer5_affine.json'))

    layer0_norm.load_json(os.path.join(net_path, 'layer0_norm.json'))
    layer1_norm.load_json(os.path.join(net_path, 'layer1_norm.json'))
    layer2_norm.load_json(os.path.join(net_path, 'layer2_norm.json'))
    layer3_norm.load_json(os.path.join(net_path, 'layer3_norm.json'))
    layer4_norm.load_json(os.path.join(net_path, 'layer4_norm.json'))
    layer5_norm.load_json(os.path.join(net_path, 'layer5_norm.json'))

main_net.send_command('parameter_lock true')
layer0_cnv.send_command('parameter_lock false')

loss      = bb.LossSoftmaxCrossEntropy.create()
metrics   = bb.MetricsCategoricalAccuracy.create()
optimizer = bb.OptimizerAdam.create()
optimizer.set_variables(main_net.get_parameters(), main_net.get_gradients())

runner = bb.Runner(main_net, "mnist-dense-cnn-binary", loss, metrics, optimizer)
runner.fitting(td, epoch_size=3, mini_batch_size=32, file_write=False, file_read=False, init_eval=True)

  0%|                                                                  | 0/1875 [00:00<?, ?it/s, loss=1.46, accuracy=1]

[initial] accuracy=0.988700 loss=1.477333


  0%|                                                                                         | 0/1875 [00:00<?, ?it/s]

epoch=1 test_accuracy=0.865900 test_loss=1.607697 train_accuracy=0.859917 train_loss=1.611926


  0%|                                                                                         | 0/1875 [00:00<?, ?it/s]

epoch=2 test_accuracy=0.987200 test_loss=1.483252 train_accuracy=0.988033 train_loss=1.482732


 83%|██████████████████████████████████████████         | 1547/1875 [09:34<02:02,  2.67it/s, loss=1.48, accuracy=0.984]

In [14]:
if True:
    layer0_affine.save_json(os.path.join(net_path, 'layer0_affine.json'))
    layer1_affine.save_json(os.path.join(net_path, 'layer1_affine.json'))
    layer2_affine.save_json(os.path.join(net_path, 'layer2_affine.json'))
    layer3_affine.save_json(os.path.join(net_path, 'layer3_affine.json'))
    layer4_affine.save_json(os.path.join(net_path, 'layer4_affine.json'))
    layer5_affine.save_json(os.path.join(net_path, 'layer5_affine.json'))

    layer0_norm.save_json(os.path.join(net_path, 'layer0_norm.json'))
    layer1_norm.save_json(os.path.join(net_path, 'layer1_norm.json'))
    layer2_norm.save_json(os.path.join(net_path, 'layer2_norm.json'))
    layer3_norm.save_json(os.path.join(net_path, 'layer3_norm.json'))
    layer4_norm.save_json(os.path.join(net_path, 'layer4_norm.json'))
    layer5_norm.save_json(os.path.join(net_path, 'layer5_norm.json'))

In [None]:
main_net.send_command('parameter_lock true')

modulation_size = 4
layer_rel2bin = bb.RealToBinary.create(modulation_size, framewise=True)
layer_bin2rel = bb.BinaryToReal.create(modulation_size)

net = bb.Sequential.create()
net.add(layer_rel2bin)
net.add(main_net)
net.add(layer_bin2rel)
net.set_input_shape(td['x_shape'])

loss      = bb.LossSoftmaxCrossEntropy.create()
metrics   = bb.MetricsCategoricalAccuracy.create()
optimizer = bb.OptimizerAdam.create()
optimizer.set_variables(net.get_parameters(), net.get_gradients())

runner = bb.Runner(net, "mnist-dense-cnn-binary", loss, metrics, optimizer)
runner.fitting(td, epoch_size=2, mini_batch_size=32, file_write=False, file_read=False)

## 蒸留先

In [None]:
layer_cnv0_sl0 = bb.SparseLut6.create([192])
layer_cnv0_sl1 = bb.SparseLut6.create([32])

layer_cnv1_sl0 = bb.SparseLut6.create([192])
layer_cnv1_sl1 = bb.SparseLut6.create([32])

layer_cnv2_sl0 = bb.SparseLut6.create([384])
layer_cnv2_sl1 = bb.SparseLut6.create([64])

layer_cnv3_sl0 = bb.SparseLut6.create([384])
layer_cnv3_sl1 = bb.SparseLut6.create([64])

layer_sl4_0    = bb.SparseLut6.create([512*6])
layer_sl4_1    = bb.SparseLut6.create([512])
layer_sl5_0    = bb.SparseLut6.create([10*6*6])
layer_sl5_1    = bb.SparseLut6.create([10*6])
layer_sl5_2    = bb.SparseLut6.create([10])

# main network
target_cnv0_sub = bb.Sequential.create()
target_cnv0_sub.add(layer_cnv0_sl0)
target_cnv0_sub.add(layer_cnv0_sl1)
target_layer0_cnv = bb.LoweringConvolution.create(target_cnv0_sub, 3, 3)

target_cnv1_sub = bb.Sequential.create()
target_cnv1_sub.add(layer_cnv1_sl0)
target_cnv1_sub.add(layer_cnv1_sl1)
target_layer1_cnv = bb.LoweringConvolution.create(target_cnv1_sub, 3, 3)

target_cnv2_sub = bb.Sequential.create()
target_cnv2_sub.add(layer_cnv2_sl0)
target_cnv2_sub.add(layer_cnv2_sl1)
target_layer2_cnv = bb.LoweringConvolution.create(target_cnv2_sub, 3, 3)

target_cnv3_sub = bb.Sequential.create()
target_cnv3_sub.add(layer_cnv3_sl0)
target_cnv3_sub.add(layer_cnv3_sl1)
target_layer3_cnv = bb.LoweringConvolution.create(target_cnv3_sub, 3, 3)

target_layer4 = bb.Sequential.create()
target_layer4.add(layer_sl4_0)
target_layer4.add(layer_sl4_1)

target_layer5 = bb.Sequential.create()
target_layer5.add(layer_sl5_0)
target_layer5.add(layer_sl5_1)
target_layer5.add(layer_sl5_2)

target_net = bb.Sequential.create()
target_net.add(target_layer0_cnv)
target_net.add(target_layer1_cnv)
target_net.add(bb.MaxPooling.create(2, 2))
target_net.add(target_layer0_cnv)
target_net.add(target_layer1_cnv)
target_net.add(bb.MaxPooling.create(2, 2))
target_net.add(target_layer4)
target_net.add(target_layer5)

target_net.set_input_shape(td['x_shape'])

main_net = bb.Sequential.create()
main_net.add(layer0_cnv)
main_net.add(layer1_cnv)
main_net.add(bb.MaxPooling.create(2, 2))
main_net.add(layer2_cnv)
main_net.add(layer3_cnv)
main_net.add(bb.MaxPooling.create(2, 2))
main_net.add(layer4_affine)
main_net.add(layer4_norm)
main_net.add(layer4_bin)
main_net.add(layer5_affine)
main_net.add(layer5_norm)
main_net.add(layer5_bin)

main_net.set_input_shape(td['x_shape'])

In [None]:
# 1レイヤーの蒸留
def layer_distillation(x, x_shape, target_net, ref_net, pre_net, epoch=8):
    x_buf = bb.FrameBuffer()
    t_buf = bb.FrameBuffer()

    print(ref_net.get_input_shape())

    target_net.set_input_shape(ref_net.get_input_shape())
    target_net.send_command("binary true")
    
    pre_net.set_input_shape(x_shape)

    batch_size = len(x)
    max_batch_size = 32
    leave = True

    loss = bb.LossMeanSquaredError.create()
    optimizer = bb.OptimizerAdam.create()
    optimizer.set_variables(target_net.get_parameters(), target_net.get_gradients())
    
    for e in range(epoch):
        loss.clear()
        with tqdm(range(0, batch_size, max_batch_size), leave=leave) as pbar:
            for index in pbar:
                # calc mini_batch_size
                mini_batch_size = min(max_batch_size, batch_size-index)
                
                # setup x
                x_buf.resize(mini_batch_size, x_shape)
                x_buf.set_data(x[index:index+mini_batch_size])
                
                # forward
                x_buf = pre_net.forward(x_buf, False)
                t_buf = ref_net.forward(x_buf, False)
                y_buf = target_net.forward(x_buf, True)
                
                # calc loss
                dy_buf = loss.calculate_loss(y_buf, t_buf, mini_batch_size)

                # backward
                target_net.backward(dy_buf)

                # update
                optimizer.update()
                
                # print progress
                dict = OrderedDict()
                dict['loss'] = loss.get_loss()
                if len(dict) > 0:
                    pbar.set_postfix(dict)

In [None]:
# 事前ネット
pre_net = bb.Sequential.create()
pre_net.add(bb.RealToBinary.create(8, framewise=True))

In [None]:
# Layer0
tW = layer0_affine.W()
W = np.array(tW.get_data()).reshape(tW.get_shape()[::-1])
idx = np.argsort(-np.abs(W), axis=1)

# Weight順で接続
if True:
    for i in range(32):
        for j in range(6):
            layer_cnv0_sl1.set_connection_index([i], j, i*6+j)

    for i in range(32):
        for j in range(6*6):
            layer_cnv0_sl0.set_connection_index([i], j, idx[i][j % 3*3])


layer_cnv0_sl0.load_json(os.path.join(net_path, 'layer_cnv0_sl0.json'))
layer_cnv0_sl1.load_json(os.path.join(net_path, 'layer_cnv0_sl1.json'))

layer_distillation(td['x_train'], td['x_shape'], target_layer0_cnv, layer0_cnv, pre_net, epoch=1)

In [None]:
layer_cnv0_sl0.save_json(os.path.join(net_path, 'layer_cnv0_sl0.json'))
layer_cnv0_sl1.save_json(os.path.join(net_path, 'layer_cnv0_sl1.json'))



In [None]:
# Layer1
tW = layer1_affine.W()
W = np.array(tW.get_data()).reshape(tW.get_shape()[::-1])
idx = np.argsort(-np.abs(W), axis=1)

In [None]:
if True:
    for i in range(32):
        for j in range(6):
            layer_cnv1_sl1.set_connection_index([i], j, i*6+j)

    for i in range(32):
        for j in range(6*6):
            layer_cnv1_sl0.set_connection_index([i], j, idx[i][j])

In [None]:
if False:
    for i in range(32):
        for j in range(6):
            layer_cnv1_sl3.set_connection_index([i], j, i*6+j)
    
    for i in range(32):
        for j in range(6*6):
            layer_cnv1_sl2.set_connection_index([i], j, i*6*6+j)
            
    for i in range(32):
        for j in range(6*6):
            layer_cnv1_sl1.set_connection_index([i], j, i*6*6+j)

    for i in range(32):
        for j in range(6*6*6):
#            if j < len(idx[i]):
            layer_cnv1_sl0.set_connection_index([i], j, idx[i][j % len(idx[i])])

In [None]:
tW = layer1_affine.W()
W = np.array(tW.get_data()).reshape(tW.get_shape()[::-1])

In [None]:
idx = np.argsort(-np.abs(W), axis=1)

In [None]:
pre_net = bb.Sequential.create()
pre_net.add(bb.RealToBinary.create(4, framewise=True))

In [None]:
pre_net.add(layer0_cnv)

In [None]:
layer_distillation(td['x_train'], td['x_shape'], target_layer1_cnv, layer1_cnv, pre_net)

In [None]:
layer_cnv1_sl0.save_json(os.path.join(net_path, 'layer1_cnv_sl0.json'))
layer_cnv1_sl1.save_json(os.path.join(net_path, 'layer1_cnv_sl1.json'))