# Quantized Model Cleanup
This notebook takes the finn-onnx FACILE model exported by the quant_train notebook and cleans it up. This notebook stops before converting to hls layers.

### Load in FINN and transform

In [1]:
import onnx
from finn.util.test import get_test_model_trained
import brevitas.onnx as bo
from finn.core.modelwrapper import ModelWrapper
from finn.transformation.infer_shapes import InferShapes
from finn.transformation.fold_constants import FoldConstants
from finn.transformation.general import GiveReadableTensorNames, GiveUniqueNodeNames, RemoveStaticGraphInputs
from finn.transformation.infer_datatypes import InferDataTypes

In [2]:
#load and tidy up brevitas export
model = ModelWrapper("quant_models/facileV3_6b_1.onnx")
model = model.transform(InferShapes())
model = model.transform(FoldConstants())
model = model.transform(GiveUniqueNodeNames())
model = model.transform(GiveReadableTensorNames())
model = model.transform(InferDataTypes())
model = model.transform(RemoveStaticGraphInputs())
#model.save("quant_models/facileV2_4b_500_tidy.onnx")

In [3]:
#print input and output tensors and data types/shapes
from finn.core.datatype import DataType

in_tensor = model.graph.input[0].name
out_tensor = model.graph.output[0].name
print("Input tensor name: %s" % in_tensor)
print("Output tensor name: %s" % out_tensor)
in_shape = model.get_tensor_shape(in_tensor)
out_shape = model.get_tensor_shape(out_tensor)
print("input shape: " + str(in_shape))
print("out shape: " + str(out_shape))
model.set_tensor_datatype(in_tensor, DataType.UINT6)
#model.set_tensor_datatype(out_tensor, DataType.FLOAT32)
in_dtype = model.get_tensor_datatype(in_tensor)
out_dtype = model.get_tensor_datatype(out_tensor)
print("input dtype: " + str(in_dtype))
print("out dtype: " + str(out_dtype))
model.save("quant_models/facileV3_6b_1_tidy.onnx")

Input tensor name: global_in
Output tensor name: global_out
input shape: [1, 14]
out shape: [1, 1]
input dtype: DataType.UINT6
out dtype: DataType.FLOAT32


In [4]:
#visualize net in netron
from finn.util.visualization import showInNetron
showInNetron("quant_models/facileV3_6b_1_tidy.onnx")

Serving 'quant_models/facileV3_6b_1_tidy.onnx' at http://0.0.0.0:8081


In [9]:
from utils import load_split_np_data
import proc_for_infer as pfi
from finn.core.onnx_exec import execute_onnx as exe_onnx

datasets = load_split_np_data()

batch_size=1
inps = datasets[1]
exp_out = datasets[4]
valid_size = len(exp_out)
num_batches = int(valid_size/batch_size)
running_error_square = 0
exp_act_out = [[],[]]
for i in range(0, num_batches):
    #print(i)
    batch = inps[(i*batch_size):((i+1)*batch_size)]
    batch_exp_out = exp_out[(i*batch_size):((i+1)*batch_size)]
    proc_batch = pfi.preproc(batch)
    proc_batch = proc_batch.astype("float32")
    inp_dict = {in_tensor : proc_batch}
    #batch_out = accel.execute(proc_batch)
    out_dict = exe_onnx(model, inp_dict)
    batch_out = out_dict[out_tensor]
    batch_out = batch_out.astype("int8")
    #print(batch_out)
    batch_proc_out = pfi.postproc(batch_out)
    batch_errs = batch_proc_out-batch_exp_out
    batch_sq_errs = batch_errs*batch_errs
    running_error_square += sum(batch_sq_errs)
    #print(batch_exp_out[0][0])
    #print(batch_proc_out[0][0])
    exp_act_out[0].append(batch_exp_out[0][0])
    exp_act_out[1].append(batch_proc_out[0][0])
print("MSE: " + str(running_error_square / (num_batches * batch_size)))

X_train shape: (229538, 14)
X_val shape: (12752, 14)
X_test shape: (12752, 14)
Y_train shape: (229538, 1)
Y_val shape: (12752, 1)
Y_test shape: (12752, 1)
Using saved split data
MSE: [147.02400386]


In [None]:
import pickle
save_loc = 'exp_act_comparisons/post_transform.pkl'
with open(save_loc, 'wb') as file:
    pickle.dump(exp_act_out, file)