In [1]:
import os
import random
import numpy as np
import torch
import torchvision
from torchsummary import summary

from fpga_nn_backend.datasets import *
from fpga_nn_backend.training import *
from fpga_nn_backend.evaluation import *
from fpga_nn_backend.models.relu_toy_models import *
from fpga_nn_backend.quantization import *
from fpga_nn_backend.fpga.emulation import *
from fpga_nn_backend.fpga.conversion import *
from fpga_nn_backend.utils import *

In [2]:
print("PyTorch Version:", torch.__version__)
print("Torchvision Version:", torchvision.__version__)
# Detect if we have a GPU available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
if torch.cuda.is_available():
    print("Using the GPU!")
else:
    print("WARNING: Could not find GPU! Using CPU only")

PyTorch Version: 1.10.0
Torchvision Version: 0.11.1


In [3]:
data_dir = get_rel_pkg_path("dataset/")
weights_dir = get_rel_pkg_path("weights/")
session_dir = get_rel_pkg_path("sessions/")
models_dir = get_rel_pkg_path("models/")

In [4]:
dataset_type = ImageDatasetType.MNIST

In [5]:
orig_datasets = get_img_dataset(data_dir, dataset_type)

In [6]:
datasets = apply_img_transforms(orig_datasets, dataset_type, flatten=True)

In [7]:
dataloaders = get_dataloaders(datasets, 128, 128, num_workers=0)

In [8]:
input_dim = IMG_DATASET_TO_IMG_SIZE_FLAT[dataset_type]
num_classes = IMG_DATASET_TO_NUM_CLASSES[dataset_type]

model = ReLUToyModel(input_dim, num_classes, layer_dims=[256, 128, 64, 32])
model = model.to(device)

In [9]:
model = QuantWrapper(model)

In [10]:
summary(model, (input_dim,))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
         QuantStub-1                  [-1, 784]               0
            Linear-2                  [-1, 256]         200,704
              ReLU-3                  [-1, 256]               0
            Linear-4                  [-1, 128]          32,768
              ReLU-5                  [-1, 128]               0
            Linear-6                   [-1, 64]           8,192
              ReLU-7                   [-1, 64]               0
            Linear-8                   [-1, 32]           2,048
              ReLU-9                   [-1, 32]               0
           Linear-10                   [-1, 10]             330
     ReLUToyModel-11                   [-1, 10]               0
      DeQuantStub-12                   [-1, 10]               0
Total params: 244,042
Trainable params: 244,042
Non-trainable params: 0
-------------------------------

In [11]:
# model.load_state_dict(torch.load(os.path.join(weights_dir, r"Experiment 11-18-2021 11-04-36 PM\Weights Best.pckl")))

In [12]:
model.load_state_dict(torch.load(os.path.join(weights_dir, r"Experiment 11-20-2021 06-39-40 PM\Weights Best.pckl")))

<All keys matched successfully>

In [13]:
criterion = get_loss()
criterion = criterion.to(device)

In [14]:
model.eval()
model.qconfig = torch.quantization.get_default_qconfig('fbgemm')
#torch.quantization.fuse_modules(model.model.layers, [['0', '1'], ['2', '3'], ['4', '5'], ['6', '7']], inplace=True)
model = torch.quantization.prepare(model)
stats = get_dataloader_stats(dataloaders['test'], model, criterion, device)
model_int8 = torch.quantization.convert(model)

100%|██████████████████████████████████████████████████████████████████████████████████| 79/79 [00:06<00:00, 12.55it/s]
  src_bin_begin // dst_bin_width, 0, self.dst_nbins - 1
  src_bin_end // dst_bin_width, 0, self.dst_nbins - 1


In [15]:
print("Accuracy:", stats['acc'])

Accuracy: 0.9241


In [16]:
model_int8

QuantWrapper(
  (model): ReLUToyModel(
    (layers): Sequential(
      (0): QuantizedLinear(in_features=784, out_features=256, scale=0.051407504826784134, zero_point=42, qscheme=torch.per_channel_affine)
      (1): ReLU(inplace=True)
      (2): QuantizedLinear(in_features=256, out_features=128, scale=0.06380598247051239, zero_point=30, qscheme=torch.per_channel_affine)
      (3): ReLU(inplace=True)
      (4): QuantizedLinear(in_features=128, out_features=64, scale=0.11453789472579956, zero_point=25, qscheme=torch.per_channel_affine)
      (5): ReLU(inplace=True)
      (6): QuantizedLinear(in_features=64, out_features=32, scale=0.21549321711063385, zero_point=27, qscheme=torch.per_channel_affine)
      (7): ReLU(inplace=True)
      (8): QuantizedLinear(in_features=32, out_features=10, scale=0.3798845708370209, zero_point=69, qscheme=torch.per_channel_affine)
    )
  )
  (quant): Quantize(scale=tensor([0.0079]), zero_point=tensor([0]), dtype=torch.quint8)
  (dequant): DeQuantize()
)

In [17]:
model_int8.model.layers[0].bias()

In [18]:
stats = get_dataloader_stats(dataloaders['test'], model_int8, criterion, device)

100%|██████████████████████████████████████████████████████████████████████████████████| 79/79 [00:03<00:00, 20.88it/s]


In [19]:
print("Accuracy:", stats['acc'])

Accuracy: 0.9206


In [20]:
converted_nn = ConvertedNN((1, 28, 28))

converted_nn.add_flatten_layer((1, 28, 28), 0, 0)

converted_nn.add_dense_layer((784,), (256,), 0, 0,
    weight=model_int8.model.layers[0].weight().int_repr().numpy(),
    bias=np.zeros(256, dtype=np.int8))
converted_nn.add_relu_layer((256,), (256,), 0, 0)

converted_nn.add_dense_layer((256,), (128,), 0, 0,
    weight=model_int8.model.layers[2].weight().int_repr().numpy(),
    bias=np.zeros(128, dtype=np.int8))
converted_nn.add_relu_layer((128,), (128,), 0, 0)

converted_nn.add_dense_layer((128,), (64,), 0, 0,
    weight=model_int8.model.layers[4].weight().int_repr().numpy(),
    bias=np.zeros(64, dtype=np.int8))
converted_nn.add_relu_layer((64,), (64,), 0, 0)

converted_nn.add_dense_layer((64,), (32,), 0, 0,
    weight=model_int8.model.layers[6].weight().int_repr().numpy(),
    bias=np.zeros(32, dtype=np.int8))
converted_nn.add_relu_layer((32,), (32,), 0, 0)

converted_nn.add_dense_layer((32,), (10,), 0, 0,
    weight=model_int8.model.layers[8].weight().int_repr().numpy(),
    bias=np.zeros(10, dtype=np.int8))

In [21]:
import pprint
pprint.pprint(converted_nn.get_execution_info())

{'layers': [{'input_shape': (1, 28, 28),
             'layer_type': <LayerType.FLATTEN: 5>,
             'output_shape': (784,),
             'parameters': None,
             'stack_input_indices': (0,),
             'stack_output_index': 0},
            {'input_shape': (784,),
             'layer_type': <LayerType.DENSE: 0>,
             'output_shape': (256,),
             'parameters': {'bias': 0, 'weight': 0},
             'stack_input_indices': (0,),
             'stack_output_index': 0},
            {'input_shape': (256,),
             'layer_type': <LayerType.RELU: 2>,
             'output_shape': (256,),
             'parameters': None,
             'stack_input_indices': (0,),
             'stack_output_index': 0},
            {'input_shape': (256,),
             'layer_type': <LayerType.DENSE: 0>,
             'output_shape': (128,),
             'parameters': {'bias': 0, 'weight': 0},
             'stack_input_indices': (0,),
             'stack_output_index': 0},
          

In [22]:
coe_data = converted_nn.generate_parameter_coe(num_banks=1)

In [23]:
with open("test.coe", 'w') as f:
    f.write(coe_data)