In [1]:
import os
import random
import numpy as np
import torch
import torchvision
from torchsummary import summary

from fpga_nn_backend.datasets import *
from fpga_nn_backend.training import *
from fpga_nn_backend.evaluation import *
from fpga_nn_backend.models.relu_toy_models import *
from fpga_nn_backend.quantization import *
from fpga_nn_backend.fpga_simple.emulation import *
from fpga_nn_backend.fpga_simple.conversion import *
from fpga_nn_backend.utils import *

In [2]:
print("PyTorch Version:", torch.__version__)
print("Torchvision Version:", torchvision.__version__)
# Detect if we have a GPU available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
if torch.cuda.is_available():
    print("Using the GPU!")
else:
    print("WARNING: Could not find GPU! Using CPU only")

PyTorch Version: 1.10.0
Torchvision Version: 0.11.1


In [3]:
data_dir = get_rel_pkg_path("dataset/")
weights_dir = get_rel_pkg_path("weights/")
session_dir = get_rel_pkg_path("sessions/")
models_dir = get_rel_pkg_path("models/")

In [4]:
dataset_type = ImageDatasetType.MNIST

In [5]:
orig_datasets = get_img_dataset(data_dir, dataset_type)

In [6]:
datasets = apply_img_transforms(orig_datasets, dataset_type, flatten=True)

In [7]:
dataloaders = get_dataloaders(datasets, 128, 128, num_workers=0)

In [8]:
input_dim = IMG_DATASET_TO_IMG_SIZE_FLAT[dataset_type]
num_classes = IMG_DATASET_TO_NUM_CLASSES[dataset_type]

model = ReLUToyModel(input_dim, num_classes, layer_dims=[256, 128, 64, 32])
model = model.to(device)

In [9]:
model = QuantWrapper(model)

In [10]:
summary(model, (input_dim,))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
         QuantStub-1                  [-1, 784]               0
            Linear-2                  [-1, 256]         200,704
              ReLU-3                  [-1, 256]               0
            Linear-4                  [-1, 128]          32,768
              ReLU-5                  [-1, 128]               0
            Linear-6                   [-1, 64]           8,192
              ReLU-7                   [-1, 64]               0
            Linear-8                   [-1, 32]           2,048
              ReLU-9                   [-1, 32]               0
           Linear-10                   [-1, 10]             320
     ReLUToyModel-11                   [-1, 10]               0
      DeQuantStub-12                   [-1, 10]               0
Total params: 244,032
Trainable params: 244,032
Non-trainable params: 0
-------------------------------

In [11]:
# model.load_state_dict(torch.load(os.path.join(weights_dir, r"Experiment 11-18-2021 11-04-36 PM\Weights Best.pckl")))

In [12]:
# model.load_state_dict(torch.load(os.path.join(weights_dir, r"Experiment 11-20-2021 06-39-40 PM\Weights Best.pckl")))

In [13]:
model.load_state_dict(torch.load(os.path.join(weights_dir, r"Experiment 11-23-2021 02-52-17 PM\Weights Best.pckl")))

<All keys matched successfully>

In [14]:
criterion = get_loss()
criterion = criterion.to(device)

In [15]:
model.eval()
model.qconfig = torch.quantization.get_default_qconfig('fbgemm')
#torch.quantization.fuse_modules(model.model.layers, [['0', '1'], ['2', '3'], ['4', '5'], ['6', '7']], inplace=True)
model = torch.quantization.prepare(model)
stats = get_dataloader_stats(dataloaders['test'], model, criterion, device)
model_int8 = torch.quantization.convert(model)

100%|██████████████████████████████████████████████████████████████████████████████████| 79/79 [00:03<00:00, 22.40it/s]
  src_bin_begin // dst_bin_width, 0, self.dst_nbins - 1
  src_bin_end // dst_bin_width, 0, self.dst_nbins - 1


In [16]:
print("Accuracy:", stats['acc'])

Accuracy: 0.9259


In [17]:
model_int8

QuantWrapper(
  (model): ReLUToyModel(
    (layers): Sequential(
      (0): QuantizedLinear(in_features=784, out_features=256, scale=0.050842445343732834, zero_point=57, qscheme=torch.per_channel_affine)
      (1): ReLU(inplace=True)
      (2): QuantizedLinear(in_features=256, out_features=128, scale=0.06895451247692108, zero_point=27, qscheme=torch.per_channel_affine)
      (3): ReLU(inplace=True)
      (4): QuantizedLinear(in_features=128, out_features=64, scale=0.10227379202842712, zero_point=27, qscheme=torch.per_channel_affine)
      (5): ReLU(inplace=True)
      (6): QuantizedLinear(in_features=64, out_features=32, scale=0.17031508684158325, zero_point=33, qscheme=torch.per_channel_affine)
      (7): ReLU(inplace=True)
      (8): QuantizedLinear(in_features=32, out_features=10, scale=0.40706461668014526, zero_point=64, qscheme=torch.per_channel_affine)
    )
  )
  (quant): Quantize(scale=tensor([0.0079]), zero_point=tensor([0]), dtype=torch.quint8)
  (dequant): DeQuantize()
)

In [18]:
model_int8.model.layers[0].weight().int_repr()

tensor([[ 24, -36,  55,  ..., -34, -51,  18],
        [ 66, -58,  62,  ...,   5, -62, -66],
        [ 60, -87,  -6,  ...,  66,  17, -56],
        ...,
        [-41, -13,  41,  ...,  39,  57, -25],
        [-32,  66, -31,  ..., -48,   5,  49],
        [ 57, -51,  -4,  ..., -30, -18,  -3]], dtype=torch.int8)

In [19]:
model_int8.model.layers[8].bias()

In [20]:
stats = get_dataloader_stats(dataloaders['test'], model_int8, criterion, device)

100%|██████████████████████████████████████████████████████████████████████████████████| 79/79 [00:02<00:00, 30.11it/s]


In [21]:
print("Accuracy:", stats['acc'])

Accuracy: 0.924


In [22]:
def safe_get_param(param):
    if param is not None:
        return param.int_repr().numpy()
    else:
        return None

In [23]:
converted_nn = ConvertedNN((1, 28, 28))

converted_nn.add_flatten_layer((1, 28, 28), 0, 0)

converted_nn.add_dense_layer((784,), (256,), 0, 0,
    weight=safe_get_param(model_int8.model.layers[0].weight()),
    bias=safe_get_param(model_int8.model.layers[0].bias()))
converted_nn.add_relu_layer((256,), 0, 0)

converted_nn.add_dense_layer((256,), (128,), 0, 0,
    weight=safe_get_param(model_int8.model.layers[2].weight()),
    bias=safe_get_param(model_int8.model.layers[2].bias()))
converted_nn.add_relu_layer((128,), 0, 0)

converted_nn.add_dense_layer((128,), (64,), 0, 0,
    weight=safe_get_param(model_int8.model.layers[4].weight()),
    bias=safe_get_param(model_int8.model.layers[4].bias()))
converted_nn.add_relu_layer((64,), 0, 0)

converted_nn.add_dense_layer((64,), (32,), 0, 0,
    weight=safe_get_param(model_int8.model.layers[6].weight()),
    bias=safe_get_param(model_int8.model.layers[6].bias()))
converted_nn.add_relu_layer((32,), 0, 0)

converted_nn.add_dense_layer((32,), (10,), 0, 0,
    weight=safe_get_param(model_int8.model.layers[8].weight()),
    bias=safe_get_param(model_int8.model.layers[8].bias()))

converted_nn.add_output_layer((10,), 0, 0)

In [24]:
model_int8.model.layers[8]

QuantizedLinear(in_features=32, out_features=10, scale=0.40706461668014526, zero_point=64, qscheme=torch.per_channel_affine)

In [25]:
converted_nn.finalize()

In [26]:
converted_nn.get_layer_info()

{'layers': [{'layer_type': <ConverterLayerType.FLATTEN: 5>,
   'input_shapes': ((1, 28, 28),),
   'output_shape': (784,),
   'output_size': 784,
   'stack_input_indices': (0,),
   'stack_output_index': 0,
   'parameters': None,
   'metadata': None},
  {'layer_type': <ConverterLayerType.DENSE: 0>,
   'input_shapes': ((784,),),
   'output_shape': (256,),
   'output_size': 256,
   'stack_input_indices': (0,),
   'stack_output_index': 0,
   'parameters': {'weight': 0},
   'metadata': {'has_bias': False}},
  {'layer_type': <ConverterLayerType.RELU: 2>,
   'input_shapes': ((256,),),
   'output_shape': (256,),
   'output_size': 256,
   'stack_input_indices': (0,),
   'stack_output_index': 0,
   'parameters': None,
   'metadata': None},
  {'layer_type': <ConverterLayerType.DENSE: 0>,
   'input_shapes': ((256,),),
   'output_shape': (128,),
   'output_size': 128,
   'stack_input_indices': (0,),
   'stack_output_index': 0,
   'parameters': {'weight': 0},
   'metadata': {'has_bias': False}},
  {'

In [27]:
with open("test.coe", 'w') as f:
    f.write(converted_nn.generate_parameter_coe())

In [28]:
emulator = FPGAEmulator(converted_nn, bram_reserved_size=303000)

In [29]:
emulator.exec_info

{'input_shape': (1, 28, 28),
 'inital_input_addr': 0,
 'layers': [{'layer_type': <LayerType.DENSE: 0>,
   'config': {'has_bias': None,
    'input_base_addr': 0,
    'weight_base_addr': 0,
    'bias_base_addr': 0,
    'output_base_addr': 784,
    'm_size': 256,
    'chw_size': 784}},
  {'layer_type': <LayerType.MOVE: 5>,
   'config': {'input_base_addr': 784, 'output_base_addr': 0, 'n_size': 256}},
  {'layer_type': <LayerType.RELU: 2>,
   'config': {'input_base_addr': 0, 'output_base_addr': 0, 'n_size': 256}},
  {'layer_type': <LayerType.DENSE: 0>,
   'config': {'has_bias': None,
    'input_base_addr': 0,
    'weight_base_addr': 0,
    'bias_base_addr': 0,
    'output_base_addr': 256,
    'm_size': 128,
    'chw_size': 256}},
  {'layer_type': <LayerType.MOVE: 5>,
   'config': {'input_base_addr': 256, 'output_base_addr': 0, 'n_size': 128}},
  {'layer_type': <LayerType.RELU: 2>,
   'config': {'input_base_addr': 0, 'output_base_addr': 0, 'n_size': 128}},
  {'layer_type': <LayerType.DENSE: 0

In [30]:
for imgs, labels in dataloaders['test']:
    break
data = (imgs[0] * 255).numpy()
data = (data/2).astype(np.int8)
print(data)

[  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0  42  92  79  75  30  18   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0 111 127 127 127
 127 120  99  99  99  99  99  99  99  99  85  26   

In [31]:
parameters = converted_nn.parameters_info['parameters']
print([p.shape for p in parameters])

def dense_no_bias(w, i):
    pass

def relu(i):
    return np.maximum(0, i)

[(256, 784), (128, 256), (64, 128), (32, 64), (10, 32)]


In [32]:
inputs_per_layer = {}
outputs_per_layer = {}
def get_output(name):
    def hook(model, input, output):
        inputs_per_layer[name] = input
        outputs_per_layer[name] = output.detach()
    return hook

hooks = {}
for name, module in model_int8.named_modules():
    hooks[name] = module.register_forward_hook(get_output(name))

In [33]:
batch = imgs[None, 0, :]
model_out = model_int8(batch)

In [34]:
outputs_per_layer.keys()

dict_keys(['quant', 'model.layers.0', 'model.layers.1', 'model.layers.2', 'model.layers.3', 'model.layers.4', 'model.layers.5', 'model.layers.6', 'model.layers.7', 'model.layers.8', 'model.layers', 'model', 'dequant', ''])

In [35]:
inputs_per_layer['model.layers.0'][0].int_repr().numpy()

array([[  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0

In [36]:
outputs_per_layer['model.layers.0'].int_repr()

tensor([[57, 65, 57, 57, 59, 67, 65, 72, 57, 71, 58, 57, 64, 68, 58, 57, 62, 58,
         57, 57, 62, 63, 57, 79, 57, 73, 60, 63, 57, 57, 62, 57, 58, 57, 73, 57,
         57, 57, 57, 57, 80, 71, 77, 70, 65, 71, 57, 57, 57, 57, 73, 73, 57, 57,
         62, 64, 61, 61, 71, 57, 75, 66, 57, 58, 59, 58, 57, 57, 58, 57, 61, 57,
         77, 58, 59, 70, 67, 58, 62, 57, 61, 72, 57, 72, 68, 58, 68, 64, 63, 65,
         64, 79, 57, 57, 67, 57, 64, 57, 77, 66, 62, 74, 57, 66, 58, 66, 57, 70,
         59, 69, 57, 74, 78, 71, 58, 57, 75, 61, 57, 57, 57, 65, 62, 76, 59, 58,
         62, 59, 67, 72, 57, 57, 71, 67, 58, 63, 76, 62, 60, 61, 57, 59, 60, 57,
         63, 65, 74, 64, 75, 57, 63, 60, 57, 67, 58, 61, 63, 70, 58, 75, 57, 68,
         60, 58, 66, 57, 75, 57, 57, 61, 62, 57, 63, 61, 59, 57, 57, 58, 57, 72,
         61, 57, 57, 59, 72, 67, 59, 59, 57, 57, 65, 74, 65, 62, 57, 67, 60, 57,
         60, 57, 57, 58, 57, 57, 72, 57, 68, 57, 65, 63, 63, 59, 65, 57, 57, 60,
         57, 73, 57, 57, 58,

In [37]:
outputs_per_layer['model.layers.8'].int_repr()

tensor([[70, 52, 62, 75, 48, 68, 41, 90, 64, 75]], dtype=torch.uint8)

In [38]:
model_int8.model.layers[0]

QuantizedLinear(in_features=784, out_features=256, scale=0.050842445343732834, zero_point=57, qscheme=torch.per_channel_affine)

In [39]:
data

array([  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   

In [45]:
model_int8.model.layers[0].weight().int_repr()

tensor([[ 24, -36,  55,  ..., -34, -51,  18],
        [ 66, -58,  62,  ...,   5, -62, -66],
        [ 60, -87,  -6,  ...,  66,  17, -56],
        ...,
        [-41, -13,  41,  ...,  39,  57, -25],
        [-32,  66, -31,  ..., -48,   5,  49],
        [ 57, -51,  -4,  ..., -30, -18,  -3]], dtype=torch.int8)

In [41]:
parameters[0]

array([[ 24, -36,  55, ..., -34, -51,  18],
       [ 66, -58,  62, ...,   5, -62, -66],
       [ 60, -87,  -6, ...,  66,  17, -56],
       ...,
       [-41, -13,  41, ...,  39,  57, -25],
       [-32,  66, -31, ..., -48,   5,  49],
       [ 57, -51,  -4, ..., -30, -18,  -3]], dtype=int8)

In [42]:
(parameters[0] @ data)

array([  19,  105, -115,  -22,   93,  126,  121,   68, -115,  -82,  -93,
         25,   34,  -19,  118,   96,  -87,  -24,  -23,   33,  -21,   33,
         73,   62,   95,   -9,   80,   47,  -59,   21,   82,  -82,  -72,
         68,   23,   65,  -99,  -37,  -30,   32,   14,  113,   33,  -18,
        -33,    7,  -88,  -23,   26,   40,   44,  -21,  102, -110,  109,
         64,   39,  109,  -93,  -59,   12,  -21,  -44,  -67, -111,  106,
        -85,    3,   85,   10,  -69,  -56,  -95,   98,  119, -122,   90,
        -62,  111,  -53, -114,  -68,    2,   30,   51,  114,  103,  110,
        -82,  -46,   18,    4,  120,  -36,  -72,   61,   39, -125,  -90,
         91,   37,   -2,  -49,  -95,  -87,  -87,  126,    5,    9,   44,
         87,   -4,  -26,  107,   42,  -28,  -25,  -12, -103,  -53,  -18,
         61,   66,   25,   77,   68,    9,  -65,   63,   84,   54,   20,
         97,   98,   -3,  -93,   31, -105,  110,  -99,   -9,   68,   -3,
        -93,   37,   56,  -64,  -19, -117, -122,  -

In [43]:
emulator.execute(data)

{'has_bias': None, 'input_base_addr': 0, 'weight_base_addr': 0, 'bias_base_addr': 0, 'output_base_addr': 784, 'm_size': 256, 'chw_size': 784}


  o_out[i] = np.int8(w_in[i] * i_in[i] + b_in[i])


{'input_base_addr': 784, 'output_base_addr': 0, 'n_size': 256}
[  19  105 -115  -22   93  126  121   68 -115  -82  -93   25   34  -19
  118   96  -87  -24  -23   33  -21   33   73   62   95   -9   80   47
  -59   21   82  -82  -72   68   23   65  -99  -37  -30   32   14  113
   33  -18  -33    7  -88  -23   26   40   44  -21  102 -110  109   64
   39  109  -93  -59   12  -21  -44  -67 -111  106  -85    3   85   10
  -69  -56  -95   98  119 -122   90  -62  111  -53 -114  -68    2   30
   51  114  103  110  -82  -46   18    4  120  -36  -72   61   39 -125
  -90   91   37   -2  -49  -95  -87  -87  126    5    9   44   87   -4
  -26  107   42  -28  -25  -12 -103  -53  -18   61   66   25   77   68
    9  -65   63   84   54   20   97   98   -3  -93   31 -105  110  -99
   -9   68   -3  -93   37   56  -64  -19 -117 -122  -68   79   55  -55
  -16  -37  -45   50    0   31   34  105  104    3  123 -127   86 -120
 -122  -14   50   37  -76   72 -125 -120  -15 -111   90   26 -108   54
   82  115   7

ValueError: 