In [1]:
import os
import random
import numpy as np
import torch
import torchvision
from torchsummary import summary

from fpga_nn_backend.datasets import *
from fpga_nn_backend.training import *
from fpga_nn_backend.evaluation import *
from fpga_nn_backend.models.relu_toy_models import *
from fpga_nn_backend.quantization import *
from fpga_nn_backend.utils import *

In [2]:
print("PyTorch Version:", torch.__version__)
print("Torchvision Version:", torchvision.__version__)
# Detect if we have a GPU available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
if torch.cuda.is_available():
    print("Using the GPU!")
else:
    print("WARNING: Could not find GPU! Using CPU only")

PyTorch Version: 1.10.0
Torchvision Version: 0.11.1


In [3]:
data_dir = get_rel_pkg_path("dataset/")
weights_dir = get_rel_pkg_path("weights/")
session_dir = get_rel_pkg_path("sessions/")

In [4]:
dataset_type = ImageDatasetType.MNIST

In [5]:
orig_datasets = get_img_dataset(data_dir, dataset_type)

In [6]:
datasets = apply_img_transforms(orig_datasets, dataset_type, flatten=True)

In [7]:
dataloaders = get_dataloaders(datasets, 128, 128, num_workers=0)

In [8]:
input_dim = IMG_DATASET_TO_IMG_SIZE_FLAT[dataset_type]
num_classes = IMG_DATASET_TO_NUM_CLASSES[dataset_type]

model = ReLUToyModel(input_dim, num_classes, layer_dims=[256, 128, 64, 32])
model = model.to(device)

In [9]:
summary(model, (input_dim,))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Linear-1                  [-1, 256]         200,704
              ReLU-2                  [-1, 256]               0
            Linear-3                  [-1, 128]          32,768
              ReLU-4                  [-1, 128]               0
            Linear-5                   [-1, 64]           8,192
              ReLU-6                   [-1, 64]               0
            Linear-7                   [-1, 32]           2,048
              ReLU-8                   [-1, 32]               0
            Linear-9                   [-1, 10]             330
Total params: 244,042
Trainable params: 244,042
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.01
Params size (MB): 0.93
Estimated Total Size (MB): 0.94
-------------------------------------------

In [10]:
model = QuantWrapper(model)

In [11]:
summary(model, (input_dim,))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
         QuantStub-1                  [-1, 784]               0
            Linear-2                  [-1, 256]         200,704
              ReLU-3                  [-1, 256]               0
            Linear-4                  [-1, 128]          32,768
              ReLU-5                  [-1, 128]               0
            Linear-6                   [-1, 64]           8,192
              ReLU-7                   [-1, 64]               0
            Linear-8                   [-1, 32]           2,048
              ReLU-9                   [-1, 32]               0
           Linear-10                   [-1, 10]             330
     ReLUToyModel-11                   [-1, 10]               0
      DeQuantStub-12                   [-1, 10]               0
Total params: 244,042
Trainable params: 244,042
Non-trainable params: 0
-------------------------------

In [12]:
# model.load_state_dict(torch.load(os.path.join(weights_dir, r"Experiment 11-18-2021 11-04-36 PM\Weights Best.pckl")))

In [13]:
model.load_state_dict(torch.load(os.path.join(weights_dir, r"Experiment 11-20-2021 06-39-40 PM\Weights Best.pckl")))

<All keys matched successfully>

In [14]:
criterion = get_loss()
criterion = criterion.to(device)

In [15]:
model.model.layers[0]

Linear(in_features=784, out_features=256, bias=False)

In [19]:
model_fp32 = model
model_fp32.eval()
model_fp32.qconfig = torch.quantization.get_default_qconfig('fbgemm')
model_fp32_prepared = torch.quantization.prepare(model_fp32)
stats = get_dataloader_stats(dataloaders['test'], model_fp32_prepared, criterion, device)
model_int8 = torch.quantization.convert(model_fp32_prepared)

100%|██████████████████████████████████████████████████████████████████████████████████| 79/79 [00:03<00:00, 22.15it/s]
  src_bin_begin // dst_bin_width, 0, self.dst_nbins - 1
  src_bin_end // dst_bin_width, 0, self.dst_nbins - 1


In [17]:
print("Accuracy:", stats['acc'])

Accuracy: 0.9241


In [31]:
model_int8.model.layers[8].weight().int_repr()

tensor([[  80,   -5,   33,  115,   21,  -38,   72,  -13, -106,   18,   19,   12,
           53,  -36,   48,   25,   30,  -99,  -32,   59,   37,  -42, -103,  -62,
         -128,   27,   -7,   99,   71,   -8,  -42,   75],
        [ -56,    2,  -75,   -3,   24,  -33,    4,   26,   81,   69,   30,  -86,
          -86,  -92,  -42,  -18,   14,   26,  -36,  -30,  -69,   34,   51,   75,
          127,  -11,   27,  -68,  -53,  -29,   -6,  -61],
        [ -30,   63,   71,   59,   38,  -38,    9,   43,   69,  -19,   12,  -47,
          -60,  -48,   22,  -94,  127,  -87, -111,  102,  -66,  -23,  -82,   22,
           94,   -2,   34,   60,   20,  -10, -108,  -16],
        [ -38,    4,  -70, -109,  127,  -41,   40,    3,   76,  -40,  -44,  -65,
           33, -104,  103,   48,    6,  -44,    8,   74,   52,   84,   28,  -62,
           12,  -19,  -27,    0,  -63,  -19,  -39,   32],
        [ -67,    7,   76,   10, -128,   -5,   26,   18,  -15,    2,  -27,   11,
            7,   87,   -9,  -17,  -25, 

In [None]:
stats = get_dataloader_stats(dataloaders['test'], model_int8, criterion, device)

In [None]:
print(stats)

In [36]:
torch.save(model_int8.state_dict(), "Asdf")