In [1]:
google_colab = False

# Google Colab用事前準備

In [2]:
# install
try:
    import binarybrain as bb
except:
    if google_colab and False:
        !pip install pybind11
        %cd /content
        !nvcc -V
        !sudo rm -fr BinaryBrain
        !rm -fr BinaryBrain
        !git clone --recursive -b ver3_develop https://github.com/ryuz/BinaryBrain.git
        %cd /content/BinaryBrain/python
        !sh copy_src.sh
        !python3 setup.py build
        !python3 setup.py develop

        import binarybrain as bb

In [3]:
# mount
if google_colab:
  from google.colab import drive 
  drive.mount('/content/drive')
  %cd /content/drive/My Drive/git-work/BinaryBrain_ver3_develop/tests/python

In [4]:
device = 0

import binarybrain as bb

import importlib
importlib.reload(bb)

bb.set_device(device)
prop = bb.get_device_properties(device)
print(prop)


name                     : GeForce GTX 1660 SUPER
totalGlobalMem           : 6442450944
sharedMemPerBlock        : 49152
regsPerBlock             : 65536
warpSize                 : 32
memPitch                 : 2147483647
maxThreadsPerBlock       : 1024
maxThreadsDim[0]         : 1024
maxThreadsDim[1]         : 1024
maxThreadsDim[2]         : 64
maxGridSize[0]           : 2147483647
maxGridSize[1]           : 65535
maxGridSize[2]           : 65535
clockRate                : 1800000
totalConstMem            : 65536
major                    : 7
minor                    : 5
textureAlignment         : 512
deviceOverlap            : 1
multiProcessorCount      : 22
kernelExecTimeoutEnabled : 1
integrated               : 0
canMapHostMemory         : 1
computeMode              : 0




# メインコード

In [5]:
import binarybrain as bb
import numpy as np
import matplotlib.pyplot as plt
import random
import cv2
import os
import sys
import matplotlib.pyplot as plt
from tqdm import tqdm
from collections import OrderedDict

In [6]:
def make_test_image(src_x, src_t):
    dst_x = np.ndarray((len(src_x),  1, 28, 28), dtype=np.float32)
    dst_t = np.ndarray((len(src_x), 11, 28, 28), dtype=np.float32)
    for i in range(len(src_x)):
        x_img = src_x[i].copy()
        t_img = np.zeros((11, 28*28), dtype=np.float32)
        mask0 = x_img <= 0.5
        mask1 = x_img > 0.5
        for j in range(10):
          t_img[j,mask1] = src_t[i][j]
        t_img[10,mask0] = 0.15
        dst_x[i] = x_img.reshape(1, 28, 28)
        dst_t[i] = t_img.reshape(11, 28, 28)
    return dst_x, dst_t

In [7]:
def make_test_data(src_x, src_t):
    x, t = make_test_image(src_x, src_t)

    dst_x = np.ndarray((len(src_x) // 4,  1*56*56), dtype=np.float32)
    dst_t = np.ndarray((len(src_x) // 4, 11*56*56), dtype=np.float32)
    for i in range(len(x) // 4):
        x_img = np.hstack((np.dstack((x[i*4+0], x[i*4+1])), np.dstack((x[i*4+2], x[i*4+3]))))
        t_img = np.hstack((np.dstack((t[i*4+0], t[i*4+1])), np.dstack((t[i*4+2], t[i*4+3]))))
        dst_x[i] = x_img.reshape(-1)
        dst_t[i] = t_img.reshape(-1)
    return dst_x, dst_t

In [8]:
def image_transform(img):
    angle = random.uniform(-10.0, +10.0)
    scale = random.uniform(0.8, 1.1)
    trans = cv2.getRotationMatrix2D((14, 14), angle , scale)
    trans[0][2] += random.uniform(-2.0, 2.0)
    trans[1][2] += random.uniform(-2.0, 2.0)
    return cv2.warpAffine(img.reshape(28, 28), trans, (28, 28))

def make_td_image(src_x, src_t, w=28, h=28, depth=27):
    l = len(src_x)
    mx = (28 + depth + w + depth + 27) // 28
    my = (28 + depth + h + depth + 27) // 28
    dst_x = np.zeros(( 1, 28*my, 28*mx), dtype=np.float32)
    dst_t = np.zeros((11, 28*my, 28*mx), dtype=np.float32)
    for y in range(my):
        for x in range(mx):
            i = random.randrange(0, l)
            dst_x[0,y*28:y*28+28, x*28:x*28+28] = image_transform(src_x[i])
            dst_t[0:10,y*28:y*28+28, x*28:x*28+28] = np.tile(src_t[i], (28,28)).reshape(28, 28, 10).transpose(2, 0, 1)
    x_flag0 = dst_x[0] <= 0.5
    x_flag1 = dst_x[0] >  0.5
    for i in range(10):
        dst_t[i][x_flag0] = 0
    dst_t[10][x_flag0] = 0.15
    
    xs = random.randrange(0, 28)
    ys = random.randrange(0, 28)
    dst_x  = dst_x[:,ys:ys+2*depth+h,xs:xs+2*depth+w]
    x_flag = x_flag1[ys:ys+2*depth+h,xs:xs+2*depth+w]
    
    dst_t  = dst_t[:,ys+depth:ys+depth+h,xs+depth:xs+depth+w]
    t_flag = x_flag1[ys+depth:ys+depth+h,xs+depth:xs+depth+w]
    
    x_mask = np.zeros(x_flag.shape, dtype=np.float32)
    x_mask[x_flag] = 1.0
    t_mask = np.zeros(t_flag.shape, dtype=np.float32)
    t_mask[t_flag] = 1.0
    
    if random.randint(0, 1) > 0:
        dst_x = 1.0 - dst_x
    
    return dst_x.flatten(), dst_t.flatten(), x_mask.flatten(), t_mask.flatten()

def generate_td(src_td, train_size=60000//4, test_size=10000//4, w=28, h=28, depth=27):
    dst_td = {}
    
    dst_td['x_train']      = []
    dst_td['t_train']      = []
    dst_td['x_mask_train'] = []
    dst_td['t_mask_train'] = []
    for _ in tqdm(range(train_size)):
        x, t, xm, tm = make_td_image(src_td['x_train'], src_td['t_train'], w=w, h=h, depth=depth)
        dst_td['x_train'].append(x)
        dst_td['t_train'].append(t)
        dst_td['x_mask_train'].append(xm)
        dst_td['t_mask_train'].append(tm)
    dst_td['x_train']      = np.array(dst_td['x_train'])
    dst_td['t_train']      = np.array(dst_td['t_train'])
    dst_td['x_mask_train'] = np.array(dst_td['x_mask_train'])
    dst_td['t_mask_train'] = np.array(dst_td['t_mask_train'])
    
    dst_td['x_test']      = []
    dst_td['t_test']      = []
    dst_td['x_mask_test'] = []
    dst_td['t_mask_test'] = []
    for _ in tqdm(range(test_size)):
        x, t, xm, tm = make_td_image(src_td['x_test'], src_td['t_test'], w=w, h=h, depth=depth)
        dst_td['x_test'].append(x)
        dst_td['t_test'].append(t)
        dst_td['x_mask_test'].append(xm)
        dst_td['t_mask_test'].append(tm)
    dst_td['x_test']      = np.array(dst_td['x_test'])
    dst_td['t_test']      = np.array(dst_td['t_test'])
    dst_td['x_mask_test'] = np.array(dst_td['x_mask_test'])
    dst_td['t_mask_test'] = np.array(dst_td['t_mask_test'])
    
    dst_td['x_shape'] = [depth*2+w, depth*2+h, 1]
    dst_td['t_shape'] = [w, h, 11]
    
    return dst_td

In [9]:
# load MNIST data
td = bb.load_mnist()

# ネットワーク構築

In [10]:
data_path = 'MnistMobileNetDistillation'
os.makedirs(data_path, exist_ok=True)
network_name = 'mnist-mobilenet-distillation'

ref_affine_list = []
ref_norm_list   = []
target_lut_list = []

def clear_list():
    ref_affine_list.clear()
    ref_norm_list.clear()
    target_lut_list.clear()

def save_model_list(model_list, name, path='.'):
    os.makedirs(os.path.join(data_path, path), exist_ok=True)
    for i, model in enumerate(model_list):
        model.save_json(os.path.join(data_path, path, '%s_%d.json' % (name, i)))

def load_model_list(model_list, name, path='.'):
    for i, model in enumerate(model_list):
        filename = os.path.join(data_path, path, '%s_%d.json' % (name, i))
        if not os.path.exists(filename):
            return False
        model.load_json(filename)
    return True

def save_all_model(path='.'):
    save_model_list(ref_affine_list, 'ref_affine',    path)
    save_model_list(ref_norm_list,   'ref_norm_list', path)
    save_model_list(target_lut_list, 'target_lut',    path)
    
def load_all_model(path='.'):
    if not load_model_list(ref_affine_list, 'ref_affine',    path): return False
    if not load_model_list(ref_norm_list,   'ref_norm_list', path): return False
    if not load_model_list(target_lut_list, 'target_lut',    path): return False
    return True

def make_common_layer(model):
    layer = {}
    layer['type']   = 'common'
    layer['select'] = 'common'
    layer['common'] = model
    return layer    

def make_cnv_layer(ch_size, w=3, h=3, lut_size=2, bn=True, connection='random', padding='valid'):
    # setup infomation
    layer = {}
    layer['type']            = 'convolution'
    layer['select']          = 'ref'
    layer['connection']      = connection
    layer['target_lut_size'] = lut_size
    
    if connection=='depthwise':
        layer['ref_affine'] = bb.DepthwiseDenseAffine.create([ch_size])
    else:
        layer['ref_affine'] = bb.DenseAffine.create([ch_size])        
    layer['ref_norm']   = bb.BatchNormalization.create() # momentum=0.1)
    layer['ref_act']    = bb.BinarizeBit.create()
    
    layer['lut_size'] = lut_size
    for i in range(lut_size):
        layer['target_lut%d' % i] = bb.SparseLut6Bit.create([ch_size*(6**i)], bn)
    
    # save
    ref_affine_list.append(layer['ref_affine'])
    ref_norm_list.append(layer['ref_norm'])
    for i in range(lut_size-1, -1, -1):
        target_lut_list.append(layer['target_lut%d' % i])
    
    # make network
    ref_subnet =  bb.Sequential.create()
    ref_subnet.add(layer['ref_affine'])
    ref_subnet.add(layer['ref_norm'])
    ref_subnet.add(layer['ref_act'])
    layer['ref'] = bb.LoweringConvolutionBit.create(ref_subnet, w, h, 1, 1, padding=padding)
    
    target_subnet = bb.Sequential.create()
    for i in range(lut_size-1, -1, -1):
        target_subnet.add(layer['target_lut%d' % i])
    layer['target'] = bb.LoweringConvolutionBit.create(target_subnet, w, h, 1, 1, padding=padding)
    
    return layer

def build_net(layer_list):
    net = bb.Sequential.create()
    for layer in layer_list:
        net.add(layer[layer['select']])
    return net

In [11]:
# build network
modulation_size = 3
layer_rel2bin = bb.RealToBinaryBit.create(modulation_size, framewise=True)
layer_bin2rel = bb.BinaryToRealBit.create(modulation_size)

clear_list()
layer_list = []
layer_list.append(make_common_layer(layer_rel2bin))
layer_list.append(make_cnv_layer(36, 3, 3))                           # 26x26

layer_list.append(make_cnv_layer(36, 1, 1))
layer_list.append(make_cnv_layer(36, 3, 3, lut_size=1, connection='depthwise'))  # 24x24
layer_list.append(make_cnv_layer(36, 1, 1))

layer_list.append(make_common_layer(bb.MaxPoolingBit.create(2, 2)))   # 12x12

layer_list.append(make_cnv_layer(36*2, 1, 1))
layer_list.append(make_cnv_layer(36*2, 3, 3, lut_size=1, connection='depthwise'))  # 10x10
layer_list.append(make_cnv_layer(36, 1, 1))

layer_list.append(make_cnv_layer(36*2, 1, 1))
layer_list.append(make_cnv_layer(36*2, 3, 3, lut_size=1, connection='depthwise'))  # 8x8
layer_list.append(make_cnv_layer(36, 1, 1))

layer_list.append(make_common_layer(bb.MaxPoolingBit.create(2, 2)))      # 4x4

layer_list.append(make_cnv_layer(36*3, 1, 1))
layer_list.append(make_cnv_layer(36*3, 2, 2, lut_size=1, connection='depthwise'))  # 3x3
layer_list.append(make_cnv_layer(36, 1, 1))

layer_list.append(make_cnv_layer(36*3, 1, 1))
layer_list.append(make_cnv_layer(36*3, 3, 3, lut_size=1, connection='depthwise'))  # 1x1
layer_list.append(make_cnv_layer(10, 1, 1))

layer_list.append(make_common_layer(layer_bin2rel))

In [12]:
main_net = build_net(layer_list)
main_net.set_input_shape(td['x_shape'])
main_net.send_command('binary true')

for layer in layer_list:
    if layer['type'] != 'common':
        layer['target'].set_input_shape(layer['ref'].get_input_shape())

print(main_net.get_info())

----------------------------------------------------------------------
[Sequential] 
  --------------------------------------------------------------------
  [RealToBinary] 
   input  shape : {28, 28, 1}   output shape : {28, 28, 1}
  --------------------------------------------------------------------
  [LoweringConvolution] 
   filter size : (3, 3)
   input  shape : {28, 28, 1}   output shape : {26, 26, 36}
    ------------------------------------------------------------------
    [ConvolutionIm2Col] 
     input  shape : {28, 28, 1}     output shape : {3, 3, 1}
    ------------------------------------------------------------------
    [Sequential] 
      ----------------------------------------------------------------
      [DenseAffine] 
       input  shape : {3, 3, 1}       output shape : {36}
      ----------------------------------------------------------------
      [BatchNormalization] 
       input  shape : {36}       output shape : {36}
      ---------------------------------

In [13]:
load_model_list(ref_affine_list, 'ref_affine',    'base')
load_model_list(ref_norm_list,   'ref_norm_list', 'base')

True

In [14]:
# train
loss      = bb.LossSoftmaxCrossEntropy.create()
metrics   = bb.MetricsCategoricalAccuracy.create()
optimizer = bb.OptimizerAdam.create()
optimizer.set_variables(main_net.get_parameters(), main_net.get_gradients())
runner = bb.Runner(main_net, network_name, loss, metrics, optimizer)

In [15]:
runner.fitting(td, epoch_size=1, mini_batch_size=32, file_write=False, file_read=False)

                                                                                                                       

epoch=1 test_accuracy=0.969500 test_loss=1.491587 train_accuracy=0.969067 train_loss=1.492452


In [16]:
save_model_list(ref_affine_list, 'ref_affine',    'base')
save_model_list(ref_norm_list,   'ref_norm_list', 'base')

In [17]:
if False:
    group_size  = order.shape[0]
    output_size = lut.get_output_node_size()
    input_size  = lut.get_input_node_size()
    input_unit  = input_size // group_size
    output_unit = output_size // group_size
    for g in range(group_size):
        for o in range(output_unit):
            node = g * output_unit + o
            for i in range(lut.get_node_connection_size(node)):
                idx = o*6 + i
                if idx < order.shape[1]:
                    lut.set_node_connection_index(node, i, order[g][idx])
        

In [16]:
def lut_sort_depthwise(lut, order):
    group_size  = order.shape[0]
    output_size = lut.get_output_node_size()
    input_size  = lut.get_input_node_size()
    input_unit  = input_size // group_size
    output_unit = output_size // group_size
    for g in range(group_size):
        for o in range(output_unit):
            node = g * output_unit + o
            for i in range(lut.get_node_connection_size(node)):
                idx = o*6 + i
                if idx < order.shape[1]:
                    lut.set_node_connection_index(node, i, order[g][idx])


def distillation_layer(layer):
    if layer['type'] != 'convolution':
        return False
    
    # sort
    tensorW = layer['ref_affine'].W()
    W = np.array(tensorW.get_data()).reshape(tensorW.get_shape()[::-1])
    order = np.argsort(-np.abs(W), axis=1)
    
    if layer['connection'] == 'depthwise':
        lut_sort_depthwise(layer['target_lut0'], order)
    
    layer['select'] = 'target'
    return True

In [21]:
distillation_layer(layer_list[17])

True

In [22]:
#layer_list[17]['select'] = 'ref'

main_net = build_net(layer_list)
main_net.set_input_shape(td['x_shape'])
main_net.send_command('binary true')

In [23]:
main_net.send_command('parameter_lock true')
layer_list[17]['target'].send_command('parameter_lock false')

print(main_net.get_info())

----------------------------------------------------------------------
[Sequential] 
  --------------------------------------------------------------------
  [RealToBinary] 
   input  shape : {28, 28, 1}   output shape : {28, 28, 1}
  --------------------------------------------------------------------
  [LoweringConvolution] 
   filter size : (3, 3)
   input  shape : {28, 28, 1}   output shape : {26, 26, 36}
    ------------------------------------------------------------------
    [ConvolutionIm2Col] 
     input  shape : {28, 28, 1}     output shape : {3, 3, 1}
    ------------------------------------------------------------------
    [Sequential] 
      ----------------------------------------------------------------
      [DenseAffine] 
       input  shape : {3, 3, 1}       output shape : {36}
      ----------------------------------------------------------------
      [BatchNormalization] 
       input  shape : {36}       output shape : {36}
      ---------------------------------

In [26]:
# train
loss      = bb.LossSoftmaxCrossEntropy.create()
metrics   = bb.MetricsCategoricalAccuracy.create()
optimizer = bb.OptimizerAdam.create()
optimizer.set_variables(main_net.get_parameters(), main_net.get_gradients())
runner = bb.Runner(main_net, network_name, loss, metrics, optimizer)
runner.fitting(td, epoch_size=2, mini_batch_size=32, file_write=False, file_read=False)

  0%|                                                                                         | 0/1875 [00:00<?, ?it/s]

epoch=1 test_accuracy=0.754900 test_loss=1.659289 train_accuracy=0.751183 train_loss=1.667326


                                                                                                                       

epoch=2 test_accuracy=0.795900 test_loss=1.645633 train_accuracy=0.785017 train_loss=1.651426


In [27]:
save_all_model()

In [18]:
load_all_model()

True

In [25]:
loss      = bb.LossSoftmaxCrossEntropy.create()
metrics   = bb.MetricsCategoricalAccuracy.create()
optimizer = bb.OptimizerAdam.create()
optimizer.set_variables(main_net.get_parameters(), main_net.get_gradients())

main_net.send_command('parameter_lock false')

runner = bb.Runner(main_net, network_name, loss, metrics, optimizer)
runner.fitting(td, epoch_size=2, mini_batch_size=32, file_write=False, file_read=False)

  0%|                                                              | 0/1875 [00:00<?, ?it/s, loss=1.58, accuracy=0.844]

epoch=1 test_accuracy=0.923200 test_loss=1.542615 train_accuracy=0.917150 train_loss=1.547754


                                                                                                                       

epoch=2 test_accuracy=0.921700 test_loss=1.545916 train_accuracy=0.916733 train_loss=1.550917


In [23]:
print(main_net.get_info())

----------------------------------------------------------------------
[Sequential] 
  --------------------------------------------------------------------
  [RealToBinary] 
   input  shape : {28, 28, 1}   output shape : {28, 28, 1}
  --------------------------------------------------------------------
  [LoweringConvolution] 
   filter size : (3, 3)
   input  shape : {28, 28, 1}   output shape : {26, 26, 36}
    ------------------------------------------------------------------
    [ConvolutionIm2Col] 
     input  shape : {28, 28, 1}     output shape : {3, 3, 1}
    ------------------------------------------------------------------
    [Sequential] 
      ----------------------------------------------------------------
      [DenseAffine] 
       input  shape : {3, 3, 1}       output shape : {36}
      ----------------------------------------------------------------
      [BatchNormalization] 
       input  shape : {36}       output shape : {36}
      ---------------------------------