# Quantized Deployment and Throughput/Accuracy Testing
This notebook deploys the quantized model on an FPGA and performs throughput and accuracy testing.
If the desired deployment is on a PYNQ board, this notebook should be run from the PYNQ host.

In [1]:
from finn.core.datatype import DataType
from driver_base import FINNExampleOverlay
import numpy as np
import proc_for_infer as pfi
from utils_pynq import load_split_np_data

In [26]:
#set up model info
# dictionary describing the I/O of the FINN-generated accelerator
io_shape_dict = {
    # FINN DataType for input and output tensors
    "idt" : DataType.UINT4,
    "odt" : DataType.UINT4,
    # shapes for input and output tensors (NHWC layout)
    "ishape_normal" : (1, 14),
    "oshape_normal" : (1, 1),
    # folded / packed shapes below depend on idt/odt and input/output
    # PE/SIMD parallelization settings -- these are calculated by the
    # FINN compiler.
    "ishape_folded" : (1, 1, 14),
    "oshape_folded" : (1, 1, 1),
    "ishape_packed" : (1, 1, 7),
    "oshape_packed" : (1, 1, 1)
}
platform = "zynq-iodma"
bitfile = "output_final/deploy/bitfile/finn-accel.bit"
input_file = "data/X_train.npy"
output_comparison = "data/Y_train.npy"
runtime_weights = "runtime_weights/"
batch_size = 500

In [27]:
#create accelerator
accel = FINNExampleOverlay(bitfile_name=bitfile, platform=platform, 
                          io_shape_dict=io_shape_dict, batch_size=batch_size, fclk_mhz=4,
                          runtime_weight_dir=runtime_weights)

### Throughput Test
Run a basic throughput test and print the results

In [28]:
res = accel.throughput_test()
print(res)

{'runtime[ms]': 55.40013313293457, 'throughput[images/s]': 9025.249069352098, 'DRAM_in_bandwidth[Mb/s]': 0.063176743485464676, 'DRAM_out_bandwidth[Mb/s]': 0.0090252490693520978, 'fclk[mhz]': 3.99996, 'batch_size': 500, 'fold_input[ms]': 5.364418029785156e-05, 'pack_input[ms]': 2.497321367263794, 'copy_input_data_to_device[ms]': 0.0001876354217529297, 'copy_output_data_from_device[ms]': 7.295608520507812e-05, 'unpack_output[ms]': 0.09534645080566406, 'unfold_output[ms]': 4.792213439941406e-05}


### Accuracy Test
Perform inference on testing dataset and compare to actual values.

In [29]:
#accel.batch_size(229538)
print(accel.ishape_normal)

(500, 14)


In [30]:
datasets = load_split_np_data()
print(datasets)

X_train shape: (229538, 14)
X_val shape: (12752, 14)
X_test shape: (12752, 14)
Y_train shape: (229538, 1)
Y_val shape: (12752, 1)
Y_test shape: (12752, 1)
Using saved split data
[array([[  5.80000000e+01,   2.00000000e+00,   3.69370341e+00, ...,
          1.64524792e+03,   1.56832642e+03,   9.73000366e+02],
       [  7.30000000e+01,   3.00000000e+00,   1.63631487e+00, ...,
          1.09457141e+03,   8.94265137e+02,   7.37251587e+02],
       [  7.40000000e+01,   2.00000000e+00,   1.15914593e+01, ...,
          4.53916113e+03,   1.46776880e+03,   7.66278015e+02],
       ..., 
       [  6.40000000e+01,   2.00000000e+00,   9.21862221e+00, ...,
          1.73535828e+03,   1.19223145e+03,   6.71937500e+02],
       [  7.10000000e+01,   2.00000000e+00,   7.65338302e-01, ...,
          8.91962585e+02,   1.62368567e+03,   8.88209412e+02],
       [  9.10000000e+01,   2.00000000e+00,   8.17640972e+00, ...,
          2.95410669e+03,   1.74744006e+03,   8.36169189e+02]]), array([[  5.70000000e+01, 

In [None]:
inps = datasets[1]
exp_out = datasets[4]
valid_size = len(exp_out)
num_batches = int(valid_size/batch_size)
running_error_square = 0
for i in range(0, num_batches):
    batch = inps[(i*batch_size):((i+1)*batch_size)]
    batch_exp_out = exp_out[(i*batch_size):((i+1)*batch_size)]
    #print(batch.shape)
    proc_batch = pfi.preproc(batch)
    #proc_batch = np.flip(proc_batch, axis=1)
    batch_out = accel.execute(proc_batch)
    batch_proc_out = pfi.postproc(batch_out)
    batch_errs = batch_proc_out-batch_exp_out
    #print("exp: " + str(batch_exp_out) + " act: " + str(batch_proc_out))
    batch_sq_errs = batch_errs*batch_errs
    running_error_square += sum(batch_sq_errs)
    #batch = valid[(i*batch_size):((i+1)*batch_size)]
    #inp = batch[0]
    #inp = FACILE_preproc(inp.float())
    #inp = inp.numpy()
    #inp_dict = {in_tensor : inp}
    #exp_out = batch[1].numpy()
    #out_dict = exe_onnx(model, inp_dict)
    #out = out_dict[out_tensor]
    #out = tensor(out,dtype=float32)
    #out = FACILE_postproc(out)
    #print(out)
print("MSE: " + str(running_error_square / (num_batches * batch_size)))

In [8]:
npinfs = np.asarray(infs)
npinfs = npinfs.reshape((12752,1))
print(npinfs.shape)
print(exp_out.shape)

NameError: name 'infs' is not defined

In [55]:
err = npinfs - exp_out
print(exp_out)
running_err = 0.0
for val in err:
    #print(val)
    se = val**2
    running_err += se
mse = running_err / 229538
print(mse)
#print(err)

[[ 132.65643311]
 [   0.        ]
 [   0.        ]
 ..., 
 [   0.        ]
 [   0.        ]
 [   0.        ]]
[ 10525.77484753]


In [56]:
mae = mse**(1/2)
print(mae)
print(mae**2)

[ 102.59519895]
[ 10525.77484753]
