To get the ONNX file, run: 'python setup.py test --addopts "-k test_brevitas_quartznet"'

In [3]:
import numpy as np
from finn.util.visualization import showInNetron
from finn.core.modelwrapper import ModelWrapper

file_name = '/tmp/quartznet.onnx'
showInNetron(file_name)

Serving '/tmp/quartznet.onnx' at http://0.0.0.0:8081


# Tidy-up & change 3D to 4D

In [4]:
from finn.transformation.change_3d_tensors_to_4d import Change3DTo4DTensors
from finn.transformation.general import GiveUniqueNodeNames, GiveRandomTensorNames, GiveReadableTensorNames, GiveUniqueParameterTensors

model = ModelWrapper("/tmp/quartznet.onnx")
model = model.transform(GiveUniqueNodeNames())
model = model.transform(GiveRandomTensorNames())
model = model.transform(GiveReadableTensorNames())
model = model.transform(GiveUniqueParameterTensors())

# Convert to supported format
model = model.transform(Change3DTo4DTensors())

model.save("/tmp/quartznet_4d.onnx")

In [142]:
from finn.util.visualization import showInNetron

showInNetron("/tmp/quartznet_4d.onnx")

Stopping http://0.0.0.0:8081
Serving '/tmp/quartznet_4d.onnx' at http://0.0.0.0:8081


# Remove LogSoftmax -> ArgMax and insert TopK

In [5]:
### Replace (LogSoftMax)->ArgMax by TopK
from finn.util.basic import get_by_name
from finn.transformation.insert_topk import InsertTopK

model = ModelWrapper("/tmp/quartznet_4d.onnx")

graph_out = model.graph.output[0]
graph_out_name = graph_out.name
orig_onnx_dtype = graph_out.type.tensor_type.elem_type

last_node = model.find_producer(graph_out_name)
# Remove Argmax and LogSoftmax
if last_node.op_type=="ArgMax":
    argmax_in = last_node.input[0]
    axis = get_by_name(last_node.attribute, "axis", "name").i
    keepdims = get_by_name(last_node.attribute, "keepdims", "name").i
    second_to_last_node = model.find_producer(argmax_in)
    if second_to_last_node.op_type=="LogSoftmax":
        logsoftmax_in = second_to_last_node.input[0]
        model.graph.node.remove(second_to_last_node)
    model.graph.node.remove(last_node)
    
# Change output node
logsoftmax_in_vi = model.get_tensor_valueinfo(logsoftmax_in)
model.graph.output.insert(0, logsoftmax_in_vi)
model.graph.output.pop(1)
model.graph.value_info.remove(logsoftmax_in_vi)
   
model = model.transform(InsertTopK(k=1, axis=axis))
  
## Set output (ONNX) dtype to original output dtype
model.graph.output[0].type.tensor_type.elem_type = orig_onnx_dtype

## Rename output tensor to original output tensor name
model.rename_tensor(model.graph.output[0].name, graph_out_name)
    
model.save("/tmp/quartznet_4d_topk.onnx")

In [4]:
from finn.util.visualization import showInNetron

showInNetron("/tmp/quartznet_4d_topk.onnx")

Stopping http://0.0.0.0:8081
Serving '/tmp/quartznet_4d_topk.onnx' at http://0.0.0.0:8081


# Streamline

In [None]:
### STREAMLINING
from finn.util.visualization import showInNetron
from finn.core.modelwrapper import ModelWrapper

from finn.transformation.streamline import *
from finn.transformation.streamline.reorder import MoveMulPastDWConv, MoveLinearPastEltwiseAdd, MoveMulPastFork
from finn.transformation.streamline.absorb import AbsorbConsecutiveTransposes # No effect (only on consecutive transpose nodes)
from finn.transformation.streamline.absorb import AbsorbTransposeIntoMultiThreshold
from finn.transformation.streamline.round_thresholds import RoundAndClipThresholds
from finn.util.basic import get_by_name
from finn.core.datatype import DataType
from finn.core.modelwrapper import ModelWrapper
from finn.util.visualization import showInNetron

model = ModelWrapper("/tmp/quartznet_4d_topk.onnx")

# Collapse BatchNorm to Add and Mul
model = model.transform(BatchNormToAffine())

# Group additions
model = model.transform(MoveAddPastMul())
model = model.transform(MoveAddPastConv())
model = model.transform(MoveAddPastMul())

# Group multiplications
#### Move mul past fork
model = model.transform(MoveMulPastFork())
model = model.transform(MoveScalarMulPastConv())
model = model.transform(MoveMulPastDWConv())

# Move Mul/Add past join node
model = model.transform(MoveLinearPastEltwiseAdd())

# Collapes additions & multiplications
model = model.transform(CollapseRepeatedAdd())
model = model.transform(CollapseRepeatedMul())

# Absorb Add/Mul into multithreshold
model = model.transform(AbsorbAddIntoMultiThreshold())
model = model.transform(FactorOutMulSignMagnitude())
model = model.transform(Absorb1BitMulIntoConv())
model = model.transform(AbsorbMulIntoMultiThreshold())

## Ensure thresholds are integers
### Add quantization annotation to ensure RoundAndClipThresholds works
#for n in model.graph.node:
#    if n.op_type=="MultiThreshold":
#        odtype = get_by_name(n.attribute, "out_dtype", name_field="name").s.decode("utf-8")
#        dtype = getattr(DataType, odtype) 
#        #model.set_tensor_datatype(n.input[0], dtype)
#        model.set_tensor_datatype(n.input[0], DataType.INT32)
##from finn.transformation.infer_datatypes import InferDataTypes
##model = model.transform(InferDataTypes())
#model = model.transform(RoundAndClipThresholds())

# Ensure thresholds are integers
## Add quantization annotation to ensure RoundAndClipThresholds works
global_input_name = model.graph.input[0].name
model.set_tensor_datatype(global_input_name, DataType.INT8)

#from finn.transformation.infer_datatypes import InferDataTypes
model = model.transform(InferDataTypes())
        
model = model.transform(RoundAndClipThresholds())

model.save("/tmp/quartznet_streamlined.onnx")


In [2]:
from finn.util.visualization import showInNetron
showInNetron("/tmp/quartznet_streamlined.onnx")

FileNotFoundError: [Errno 2] No such file or directory: '/tmp/quartznet_streamlined.onnx'

In [6]:
#### Some thresholds still have floating point quantization. Modify InferDatatypes transform...

model = ModelWrapper("/tmp/quartznet_streamlined.onnx")

for n in model.graph.node:
    if n.op_type=="MultiThreshold":
        thresholds = n.input[1]
        qa = model.get_tensor_datatype(thresholds)
        if qa==DataType.FLOAT32:
            print(n.name)

# Remove floating point scalar multiplication at output


In [3]:
from finn.core.modelwrapper import ModelWrapper
model = ModelWrapper("/tmp/quartznet_streamlined.onnx")

mul_nodes = [x for x in model.graph.node if (x.op_type=="Mul")]

final_mul_node = mul_nodes[-1]

input_mul = final_mul_node.input[0]
node_after_mul = model.find_consumer(final_mul_node.output[0])
node_after_mul.input[0] = input_mul
model.graph.node.remove(final_mul_node)

model.save("/tmp/test_quartznet_4d_mulremoved.onnx")
    

In [1]:
from finn.util.visualization import showInNetron
showInNetron("/tmp/test_quartznet_4d_mulremoved.onnx")

Serving '/tmp/test_quartznet_4d_mulremoved.onnx' at http://0.0.0.0:8081


# Partitioning

In [9]:
## PARTITIONING
from finn.util.visualization import showInNetron
from finn.core.modelwrapper import ModelWrapper

from finn.transformation.create_generic_partitions import PartitionFromDict

model = ModelWrapper("/tmp/test_quartznet_4d_mulremoved.onnx")

#partitionings = {0: range(0, 3), 
#                1: range(3, 27),
#                2: range(27, 51),
#                3: range(51, 75),
#                4: range(75, 99),
#                5: range(99, 123),
#                6: range(123, 147),
#                7: range(147, 171),
#                8: range(171, 195),
#                9: range(195, 219),
#                10: range(219, 243),
#                11: range(243, 267),
#                12: range(267, 291),
#                13: range(291, 315),
#                14: range(315, 339),
#                15: range(339, 363),
#                16: range(363, 376)}
partitionings = {0: range(0, 3), 
                1: range(3, 75),
                2: range(75, 147),
                3: range(147, 219),
                4: range(219, 291),
                5: range(291, 363),
                6: range(363, 374)}


model = model.transform(PartitionFromDict(partitionings))

model.save("/tmp/quartznet_streamlined_partitioned.onnx")
showInNetron("/tmp/quartznet_streamlined_partitioned.onnx")

Stopping http://0.0.0.0:8081
Serving '/tmp/quartznet_streamlined_partitioned.onnx' at http://0.0.0.0:8081


# Lowering and absoring transpose into multithreshold

In [29]:
## LOWERING and ABSORB_TRANSPOSE_INTO_MULTITHRESHOLD
from finn.util.visualization import showInNetron
from finn.core.modelwrapper import ModelWrapper
from finn.transformation.lower_convs_to_matmul import LowerConvsToMatMul
from finn.transformation.streamline.absorb import AbsorbTransposeIntoMultiThreshold
from finn.transformation.streamline.reorder import MoveTransposePastMultiThreshold, MoveTransposePastJoinAdd, MoveTransposeBeforeFork
from finn.util.basic import get_by_name


model = ModelWrapper("/tmp/quartznet_streamlined_partitioned.onnx")

for n in model.graph.node:
    path_to_partition = get_by_name(n.attribute, "model", "name").s.decode('utf-8')
    print(path_to_partition)
    model_partition = ModelWrapper(path_to_partition)
    
    # Lower
    model_partition = model_partition.transform(LowerConvsToMatMul())
    # Absorb transpose nodes
    model_partition = model_partition.transform(AbsorbTransposeIntoMultiThreshold())
    # Reorder remaining transpose nodes
    model_partition = model_partition.transform(MoveTransposePastMultiThreshold())
    model_partition = model_partition.transform(MoveTransposePastJoinAdd())
    model_partition = model_partition.transform(MoveTransposeBeforeFork())
    
    model_partition.save(path_to_partition)
    

model.save("/tmp/quartznet_streamlined_lowered.onnx")
showInNetron("/tmp/quartznet_streamlined_lowered.onnx")

/tmp/finn_dev_mirza/partitioning_e1w2zc4k/partition_0.onnx
/tmp/finn_dev_mirza/partitioning_e1w2zc4k/partition_1.onnx
/tmp/finn_dev_mirza/partitioning_e1w2zc4k/partition_2.onnx
/tmp/finn_dev_mirza/partitioning_e1w2zc4k/partition_3.onnx
/tmp/finn_dev_mirza/partitioning_e1w2zc4k/partition_4.onnx
/tmp/finn_dev_mirza/partitioning_e1w2zc4k/partition_5.onnx
/tmp/finn_dev_mirza/partitioning_e1w2zc4k/partition_6.onnx
Stopping http://0.0.0.0:8081
Serving '/tmp/quartznet_streamlined_lowered.onnx' at http://0.0.0.0:8081


In [11]:
model = ModelWrapper("/tmp/quartznet_streamlined_lowered.onnx")
showInNetron("/tmp/quartznet_streamlined_lowered.onnx")

p = model.graph.node[0]
path = get_by_name(p.attribute, "model", "name").s.decode("utf-8")

showInNetron(path)


Stopping http://0.0.0.0:8081
Serving '/tmp/quartznet_streamlined_lowered.onnx' at http://0.0.0.0:8081
Stopping http://0.0.0.0:8081
Serving '/tmp/finn_dev_mirza/partitioning_e1w2zc4k/partition_0.onnx' at http://0.0.0.0:8081


In [51]:
model = ModelWrapper("/tmp/quartznet_streamlined_lowered.onnx")
showInNetron("/tmp/quartznet_streamlined_lowered.onnx")

p = model.graph.node[1]
path = get_by_name(p.attribute, "model", "name").s.decode("utf-8")

showInNetron(path)


Stopping http://0.0.0.0:8081
Serving '/tmp/quartznet_streamlined_lowered.onnx' at http://0.0.0.0:8081
Stopping http://0.0.0.0:8081
Serving '/tmp/finn_dev_mirza/partitioning_e1w2zc4k/partition_1.onnx' at http://0.0.0.0:8081


# Unfolding and absorbing transpose into multithreshold (again)

## 1) Partition graph according to the 5 residual blocks

In [2]:
## UNFOLD and ABSORB TRANSPOSE again
from finn.util.visualization import showInNetron
from finn.core.modelwrapper import ModelWrapper
from finn.transformation.extend_partition import ExtendPartition
from finn.transformation.streamline.absorb import AbsorbTransposeIntoMultiThreshold
from finn.transformation.infer_datatypes import InferDataTypes
from finn.transformation.create_generic_partitions import PartitionFromDict

model = ModelWrapper("/tmp/quartznet_streamlined_lowered.onnx")

#new_partitionings = [{0: range(0, 5), 1: range(5, 92)},
#                    {2: range(2, 89)},
#                    {3: range(3, 90)},
#                    {4: range(4, 91)},
#                    {5: range(5, 92)},
#                    {6: range(6, 21)} 
#                    ]

new_partitionings = [{1: range(4, 92)},
                    {2: range(5, 92)},
                    {3: range(6, 93)},
                    {4: range(7, 94)},
                    {5: range(8, 95)}
                    ]

nodes = [n for n in model.graph.node]
for ind, n in enumerate(nodes):
    if ind == 0:
        node_ind_to_unfold = [ind, ind+1] # unfold current and next node
    else:
        node_ind_to_unfold = [ind+5] # ind+1 is the Transpose node (+4 for initial nodes)
    
    model = model.transform(ExtendPartition(node_ind_to_unfold))
    model = model.transform(AbsorbTransposeIntoMultiThreshold())
    
    if ind==0:
        model = model.transform(PartitionFromDict(new_partitionings[0], "/tmp/finn_dev_mirza/partitioning_final"))
    if ind==1:
        model = model.transform(PartitionFromDict(new_partitionings[1], "/tmp/finn_dev_mirza/partitioning_final"))
    if ind==2:
        model = model.transform(PartitionFromDict(new_partitionings[2], "/tmp/finn_dev_mirza/partitioning_final"))
    if ind==3:
        model = model.transform(PartitionFromDict(new_partitionings[3], "/tmp/finn_dev_mirza/partitioning_final"))
    if ind==4:
        model = model.transform(PartitionFromDict(new_partitionings[4], "/tmp/finn_dev_mirza/partitioning_final"))
    if ind==5:
        break
    #    model = model.transform(PartitionFromDict(new_partitionings[5]), "/tmp/finn_dev_mirza/partitioning_35sdx5v_")
    
model.save("/tmp/quartznet_absorbed_transpose.onnx")


## 2) Partition graph according to 15 residual blocks

In [69]:
## UNFOLD and ABSORB TRANSPOSE again
from finn.util.visualization import showInNetron
from finn.core.modelwrapper import ModelWrapper
from finn.transformation.extend_partition import ExtendPartition
from finn.transformation.streamline.absorb import AbsorbTransposeIntoMultiThreshold
from finn.transformation.infer_datatypes import InferDataTypes
from finn.transformation.create_generic_partitions import PartitionFromDict

model = ModelWrapper("/tmp/quartznet_streamlined_lowered.onnx")

#new_partitionings = [{0: range(0, 5), 1: range(5, 92)},
#                    {2: range(2, 89)},
#                    {3: range(3, 90)},
#                    {4: range(4, 91)},
#                    {5: range(5, 92)},
#                    {6: range(6, 21)} 
#                    ]

new_partitionings = [{1: range(4, 34), 2: range(34, 63), 3: range(63, 92)},
                     {4: range(7, 36), 5: range(36, 65), 6: range(65, 94)},
                     {7: range(10, 39), 8: range(39, 68), 9: range(68, 97)},
                     {10: range(13, 42), 11: range(42, 71), 12: range(71, 100)},
                     {13: range(16, 45), 14: range(45, 74), 15: range(74, 103)}
                    ]

nodes = [n for n in model.graph.node]
for ind, n in enumerate(nodes):
    if ind == 0:
        node_ind_to_unfold = [ind, ind+1] # unfold current and next node
    else:
        node_ind_to_unfold = [3*ind+1+4] # (+4 for initial nodes, +3 partitions, +1 for Transpose node)
    
    model = model.transform(ExtendPartition(node_ind_to_unfold))
    model = model.transform(AbsorbTransposeIntoMultiThreshold())
    
    if ind==0:
        model = model.transform(PartitionFromDict(new_partitionings[0], "/tmp/finn_dev_mirza/partitioning_final"))
    if ind==1:
        model = model.transform(PartitionFromDict(new_partitionings[1], "/tmp/finn_dev_mirza/partitioning_final"))
    if ind==2:
        model = model.transform(PartitionFromDict(new_partitionings[2], "/tmp/finn_dev_mirza/partitioning_final"))
    if ind==3:
        model = model.transform(PartitionFromDict(new_partitionings[3], "/tmp/finn_dev_mirza/partitioning_final"))
    if ind==4:
        model = model.transform(PartitionFromDict(new_partitionings[4], "/tmp/finn_dev_mirza/partitioning_final"))
    if ind==5:
        break
    #    model = model.transform(PartitionFromDict(new_partitionings[5]), "/tmp/finn_dev_mirza/partitioning_35sdx5v_")
    
model.save("/tmp/quartznet_absorbed_transpose.onnx")


In [68]:
from finn.util.visualization import showInNetron
showInNetron("/tmp/quartznet_absorbed_transpose.onnx")

Stopping http://0.0.0.0:8081
Serving '/tmp/quartznet_absorbed_transpose.onnx' at http://0.0.0.0:8081


# Convert to fpgadataflow nodes

## LEFT HERE!
### Access each partition, apply hls conversion transformations...

In [4]:
from finn.core.modelwrapper import ModelWrapper
from finn.transformation.create_generic_partitions import PartitionFromDict

model = ModelWrapper("/tmp/quartznet_absorbed_transpose.onnx")

partitionings = {0: range(0,4),
                 6: range(9, 18)}

model = model.transform(PartitionFromDict(partitionings, "/tmp/finn_dev_mirza/partitioning_final"))

model.save("/tmp/quartznet_streamlined_partitioned_pre_hls.onnx")

In [6]:
from finn.util.visualization import showInNetron
showInNetron("/tmp/quartznet_streamlined_partitioned_pre_hls.onnx")

Stopping http://0.0.0.0:8081
Serving '/tmp/quartznet_streamlined_partitioned_pre_hls.onnx' at http://0.0.0.0:8081


In [None]:
from finn.util.basic import get_by_name
from finn.core.modelwrapper import ModelWrapper
import finn.transformation.fpgadataflow.convert_to_hls_layers as to_hls

model = ModelWrapper("/tmp/quartznet_streamlined_partitioned_pre_hls.onnx")

model = model.transform(to_hls.InferConvInpGen(), make_deepcopy=False)
model = model.transform(to_hls.InferVVAU(), make_deepcopy=False)
model = model.transform(to_hls.InferQuantizedStreamingFCLayer(), make_deepcopy=False)

for n in model.graph.node:
    if n.op_type=="GenericPartition":
        path_to_partition = get_by_name(n.attribute, "model", "name").s.decode('utf-8')
        print(path_to_partition)
        model_partition = ModelWrapper(path_to_partition)

        model_partition = model_partition.transform(to_hls.InferConvInpGen(), make_deepcopy=False)
        model_partition = model_partition.transform(to_hls.InferVVAU(), make_deepcopy=False)
        model_partition = model_partition.transform(to_hls.InferQuantizedStreamingFCLayer(), make_deepcopy=False)

        model_partition = model_partition.transform(to_hls.InferThresholdingLayer(), make_deepcopy=False)

        model_partition = model_partition.transform(to_hls.InferAddStreamsLayer(), make_deepcopy=False)
        
        model_partition = model_partition.transform(to_hls.InferDuplicateStreamsLayer(), make_deepcopy=False)

        model_partition.save(path_to_partition)
    

model.save("/tmp/quartznet_hls_converted.onnx")


/tmp/finn_dev_mirza/partitioning_final/partition_0.onnx


  "Setting 0-valued first threshold to 1 to avoid vivado_hls bug"


/tmp/finn_dev_mirza/partitioning_final/partition_1.onnx




/tmp/finn_dev_mirza/partitioning_final/partition_2.onnx
/tmp/finn_dev_mirza/partitioning_final/partition_3.onnx


In [None]:
model = ModelWrapper("/tmp/finn_dev_mirza/partitioning_final/partition_3.onnx")

In [None]:
from finn.util.visualization import showInNetron
showInNetron("/tmp/quartznet_hls_converted.onnx")

In [4]:
showInNetron("/tmp/finn_dev_mirza/partitioning_final/partition_1.onnx")

Stopping http://0.0.0.0:8081
Serving '/tmp/finn_dev_mirza/partitioning_final/partition_1.onnx' at http://0.0.0.0:8081


# Comparison of graph before and after HLS conversion

In [21]:
from finn.core.modelwrapper import ModelWrapper
from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim
from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode

exec_mode = "cppsim"

model_onnx = ModelWrapper("/tmp/finn_dev_mirza/partitioning_final/partition_1.onnx")
model_hls = ModelWrapper("/tmp/test_partition1_hls.onnx")

if exec_mode=="cppsim":
    model_hls = model_hls.transform(PrepareCppSim())
    model_hls = model_hls.transform(CompileCppSim())
    model_hls = model_hls.transform(SetExecMode("cppsim"))
elif exec_mode=="rtlsim":
    model_hls = model_hls.transform(SetExecMode("rtlsim"))
    model_hls = model_hls.transform(GiveUniqueNodeNames())
    model_hls = model_hls.transform(PrepareIP("xcu250-figd2104-2L-e", 5))
    model_hls = model_hls.transform(HLSSynthIP())
    model_hls = model_hls.transform(PrepareRTLSim())

In [24]:
# Took ~5 hours
model_hls.save("/tmp/test_partition1_cppsim.onnx")

showInNetron("/tmp/test_partition1_cppsim.onnx")

Stopping http://0.0.0.0:8081
Serving '/tmp/test_partition1_cppsim.onnx' at http://0.0.0.0:8081


In [26]:
from finn.core.datatype import DataType
from finn.util.basic import gen_finn_dt_tensor
import finn.core.onnx_exec as oxe

model_onnx = ModelWrapper("/tmp/finn_dev_mirza/partitioning_final/partition_1.onnx")
model_hls = ModelWrapper("/tmp/test_partition1_cppsim.onnx")

inp_dtype = DataType.INT8
inp_shape = model_onnx.get_tensor_shape(model_onnx.graph.input[0].name)
x = gen_finn_dt_tensor(DataType.INT8, inp_shape)
inp_dict = {model_onnx.graph.input[0].name: x}

assert(model_onnx.graph.input[0].name==model_hls.graph.input[0].name)
assert(model_onnx.get_tensor_shape(model_onnx.graph.input[0].name)==model_hls.get_tensor_shape(model_hls.graph.input[0].name))

oxe.compare_execution(model_onnx, model_hls, inp_dict)




  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)


True

# Compare 2 models

1. Original QuartzNet
2. Any other


In [7]:
import numpy as np
from finn.core.modelwrapper import ModelWrapper
from finn.util.basic import gen_finn_dt_tensor
import finn.core.onnx_exec as oxe

import time
t1 = time.perf_counter()

################################################################################################
####
#### MODEL 1
####
model_1 = ModelWrapper("/tmp/quartznet.onnx")

#### MODEL 1
# Create input data
input0_tensor_name = model_1.graph.input[0].name

input_shape = model_1.get_tensor_shape(input0_tensor_name)
#input_dtype = model_1.get_tensor_datatype(input0_tensor_name)
#input_val = gen_finn_dt_tensor(input_dtype, input_shape)
input_val = np.random.randint(low=-128, high=127, size=input_shape).astype(np.float32)
input_dict = {}
input_dict[input0_tensor_name] = input_val
output0_tensor_name = model_1.graph.output[0].name

expected_m1_dict = oxe.execute_onnx(model_1, input_dict, return_full_exec_context = False)
expected_m1 = expected_m1_dict[output0_tensor_name]
################################################################################################


t2 = time.perf_counter() - t1
print("Elapsed time: {}".format(t2))

Elapsed time: 358.9236013859918


In [8]:
import time
t1 = time.perf_counter()

################################################################################################
####
#### MODEL 2
####
model_2 = ModelWrapper("/tmp/quartznet_streamlined.onnx") # CORRECT (new inferred datatypes)
#model_2 = ModelWrapper("/tmp/quartznet_streamlined_partitioned.onnx") #CORRECT
#model_2 = ModelWrapper("/tmp/quartznet_streamlined_lowered.onnx") #CORRECT
#model_2 = ModelWrapper("/tmp/quartznet_temp_test.onnx") #CORRECT?
#model_2 = ModelWrapper("/tmp/quartznet_4d_topk.onnx") # CORRECT (new)

#model_2 = ModelWrapper("/tmp/test_quartznet_mulremoved.onnx") # CORRECT (new)

#### MODEL 2
m1_input_val = input_val

input0_tensor_name = model_2.graph.input[0].name
#input_shape = model_2.get_tensor_shape(input0_tensor_name)
#input_dtype = model_2.get_tensor_datatype(input0_tensor_name)
input_dict = {}
m2_input_val = np.reshape(m1_input_val, np.shape(m1_input_val)+(1,))
input_dict[input0_tensor_name] = m2_input_val
output0_tensor_name = model_2.graph.output[0].name

expected_m2_dict = oxe.execute_onnx(model_2, input_dict, return_full_exec_context = False)
expected_m2 = expected_m2_dict[output0_tensor_name]

expected_m2 = np.reshape(expected_m2, np.shape(expected_m1))
m2_input_val = np.reshape(m2_input_val, np.shape(m1_input_val))


assert(m1_input_val==m2_input_val).all()
assert(expected_m1==expected_m2).all()
################################################################################################


t2 = time.perf_counter() - t1
print("Elapsed time: {}".format(t2))

  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype

  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype

  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype

Elapsed time: 962.2795071309956
