# Google Colab での使い方


## ランタイムでGPUを選択

まず、「ランタイム」→ 「ランタイムのタイプを変更」のハードウェアアクセラレーターで「GPU」を選びます

In [None]:
# GoogleColab かどうか判定
try:
  from google.colab import drive
  GoogleColab = True
except ModuleNotFoundError:
  GoogleColab = False

## 推奨の追加コマンド

In [None]:
try:
  import binarybrain as bb
except ModuleNotFoundError:
  from google.colab import drive
  !pip install pybind11
  !pip install binarybrain
  import binarybrain as bb
  import os
  drive.mount('/content/drive')
  work_directory = '/content/drive/My Drive/BinaryBrain'
  os.makedirs(work_directory, exist_ok=True)
  os.chdir(work_directory)

# 以降メンテナンス用コマンド

## TestPyPIからのインストール

In [None]:
!pip install pybind11
!pip install --index-url https://test.pypi.org/simple/ binarybrain

## PyPIからのインストール

In [None]:
!pip install pybind11
!pip install --index-url https://test.pypi.org/simple/ binarybrain

## pip のアンインスール

In [None]:
!pip uninstall -y binarybrain

## setup.py での  インストール

下記の実行でインストール可能<br>
インストール後に、一度「ランタイム」→「ランタイムの再起動」を選んで再起動が必要

In [None]:
try:
  import binarybrain
except ModuleNotFoundError:
  !pip install pybind11
  !git clone -b ver4_release  https://github.com/ryuz/BinaryBrain.git
  %cd BinaryBrain
  !python3 setup.py install --user
  sys.exit()  # please reboot runtime

developインストールの場合

In [None]:
try:
  import binarybrain
except ModuleNotFoundError:
  %cd /content
  !rm -fr BinaryBrain
  !pip install pybind11
  !git clone -b ver4_release  https://github.com/ryuz/BinaryBrain.git
  %cd BinaryBrain
  !python3 setup.py develop
  sys.exit()  # please reboot runtime

## GoogleDrive のマウント

In [None]:
import os
from google.colab import drive
drive.mount('/content/drive')
work_directory = '/content/drive/My Drive/BinaryBrain'
os.makedirs(work_directory, exist_ok=True)
os.chdir(work_directory)

## バージョン確認

In [None]:
# import
import binarybrain as bb

# バージョン表示
bb_version = bb.get_version_string()
print('BinaryBrain ver : %s'%bb_version)

# 利用可能なGPUの個数
device_count = bb.get_device_count()
print('GPU count : %d\n' % device_count)

# GPU情報の表示
for i in range(device_count):
  print('[GPU<%d> Properties]'%i)
  print(bb.get_device_properties_string(i))

## 簡単な学習確認

In [None]:
import os
import shutil
import numpy as np
from tqdm.notebook import tqdm

import torch
import torchvision
import torchvision.transforms as transforms

import binarybrain as bb

# configuration
data_path             = './data/'
net_name              = 'MnistDifferentiableLutSimple'
data_path             = os.path.join('./data/', net_name)
rtl_sim_path          = '../../verilog/mnist'
rtl_module_name       = 'MnistLutSimple'
output_velilog_file   = os.path.join(data_path, net_name + '.v')
sim_velilog_file      = os.path.join(rtl_sim_path, rtl_module_name + '.v')

epochs                = 4
mini_batch_size       = 64
frame_modulation_size = 15

# dataset
dataset_path = './data/'
dataset_train = torchvision.datasets.MNIST(root=dataset_path, train=True, transform=transforms.ToTensor(), download=True)
dataset_test  = torchvision.datasets.MNIST(root=dataset_path, train=False, transform=transforms.ToTensor(), download=True)
loader_train = torch.utils.data.DataLoader(dataset=dataset_train, batch_size=mini_batch_size, shuffle=True, num_workers=2)
loader_test  = torch.utils.data.DataLoader(dataset=dataset_test,  batch_size=mini_batch_size, shuffle=False, num_workers=2)

# define network
net = bb.Sequential([
            bb.RealToBinary(frame_modulation_size=frame_modulation_size),
            bb.DifferentiableLut([1024]),
            bb.DifferentiableLut([420]),
            bb.DifferentiableLut([70]),
            bb.Reduce([10]),
            bb.BinaryToReal(frame_integration_size=frame_modulation_size)
        ])
net.set_input_shape([1, 28, 28])

net.send_command("binary true")

loss      = bb.LossSoftmaxCrossEntropy()
metrics   = bb.MetricsCategoricalAccuracy()
optimizer = bb.OptimizerAdam()

optimizer.set_variables(net.get_parameters(), net.get_gradients())

# load
bb.load_networks(data_path, net)

# learning
for epoch in range(epochs):
    # learning
    loss.clear()
    metrics.clear()
    with tqdm(loader_train) as t:
        for images, labels in t:
            x_buf = bb.FrameBuffer.from_numpy(np.array(images).astype(np.float32))
            t_buf = bb.FrameBuffer.from_numpy(np.identity(10)[np.array(labels)].astype(np.float32))

            y_buf = net.forward(x_buf, train=True)
            
            dy_buf = loss.calculate(y_buf, t_buf)
            metrics.calculate(y_buf, t_buf)
            
            net.backward(dy_buf)

            optimizer.update()
            
            t.set_postfix(loss=loss.get(), acc=metrics.get())

    # test
    loss.clear()
    metrics.clear()
    for images, labels in loader_test:
        x_buf = bb.FrameBuffer.from_numpy(np.array(images).astype(np.float32))
        t_buf = bb.FrameBuffer.from_numpy(np.identity(10)[np.array(labels)].astype(np.float32))

        y_buf = net.forward(x_buf, train=False)

        loss.calculate(y_buf, t_buf)
        metrics.calculate(y_buf, t_buf)

    print('epoch[%d] : loss=%f accuracy=%f' % (epoch, loss.get(), metrics.get()))

    bb.save_networks(data_path, net, backups=3)