# Quantized Model Cleanup
This notebook takes the finn-onnx FACILE model exported by the quant_train notebook and cleans it up. This notebook stops before converting to hls layers.

### Load in FINN and transform

In [8]:
import onnx
from finn.util.test import get_test_model_trained
import brevitas.onnx as bo
from finn.core.modelwrapper import ModelWrapper
from finn.transformation.infer_shapes import InferShapes
from finn.transformation.fold_constants import FoldConstants
from finn.transformation.general import GiveReadableTensorNames, GiveUniqueNodeNames, RemoveStaticGraphInputs
from finn.transformation.infer_datatypes import InferDataTypes

In [14]:
#load and tidy up brevitas export
model = ModelWrapper("quant_models/facileV2_4b_1.onnx")
model = model.transform(InferShapes())
model = model.transform(FoldConstants())
model = model.transform(GiveUniqueNodeNames())
model = model.transform(GiveReadableTensorNames())
model = model.transform(InferDataTypes())
model = model.transform(RemoveStaticGraphInputs())
#model.save("quant_models/facileV2_4b_500_tidy.onnx")

In [15]:
#print input and output tensors and data types/shapes
from finn.core.datatype import DataType

in_tensor = model.graph.input[0].name
out_tensor = model.graph.output[0].name
print("Input tensor name: %s" % in_tensor)
print("Output tensor name: %s" % out_tensor)
in_shape = model.get_tensor_shape(in_tensor)
out_shape = model.get_tensor_shape(out_tensor)
print("input shape: " + str(in_shape))
print("out shape: " + str(out_shape))
model.set_tensor_datatype(in_tensor, DataType.UINT4)
model.set_tensor_datatype(out_tensor, DataType.UINT4)
in_dtype = model.get_tensor_datatype(in_tensor)
out_dtype = model.get_tensor_datatype(out_tensor)
print("input dtype: " + str(in_dtype))
print("out dtype: " + str(out_dtype))
model.save("quant_models/facileV2_4b_1_tidy.onnx")

Input tensor name: global_in
Output tensor name: global_out
input shape: [1, 14]
out shape: [1, 1]
input dtype: DataType.UINT4
out dtype: DataType.UINT4


In [11]:
#visualize net in netron
from finn.util.visualization import showInNetron
showInNetron("quant_models/facileV2_4b_1_tidy.onnx")

Serving 'quant_models/facileV2_4b_1_tidy.onnx' at http://0.0.0.0:8081


In [16]:
#test inference
from finn.core.onnx_exec import execute_onnx as exe_onnx
from utils import load_torch_datasets
import numpy as np

train, test, valid, shape = load_torch_datasets()
valid_size = len(valid)
batch_size = 1
num_batches = int(valid_size/batch_size)
running_error_square = 0
for i in range(0, num_batches):
    batch = valid[(i*batch_size):((i+1)*batch_size)]
    inp = batch[0].numpy()
    inp_dict = {in_tensor : inp}
    exp_out = batch[1].numpy()
    out_dict = exe_onnx(model, inp_dict)
    out = out_dict[out_tensor]
    print(out)

X_train shape: (229538, 14)
X_val shape: (12752, 14)
X_test shape: (12752, 14)
Y_train shape: (229538, 1)
Y_val shape: (12752, 1)
Y_test shape: (12752, 1)
Using saved split data


  "FINN datatype.".format(tensor, dtype)


Exception: Rounding error is too high to match set FINN
            datatype (DataType.UINT4) for input global_in