To get the ONNX file, run: 'python setup.py test --addopts "-k test_brevitas_quartznet"'

In [23]:
import numpy as np
from finn.util.visualization import showInNetron
from finn.core.modelwrapper import ModelWrapper

#file_name = '/tmp/quartznet.onnx'
#file_name = "/tmp/finn_dev_mirza/end2end_quartznet_export.onnx"
#file_name = 'models/end2end_quartznet_export.onnx'
file_name = "models/end2end_quartznet_export_dev.onnx"
showInNetron(file_name)

Stopping http://0.0.0.0:8081
Serving 'models/end2end_quartznet_export_dev.onnx' at http://0.0.0.0:8081


# Tidy-up & change 3D to 4D

In [24]:
from finn.transformation.change_3d_tensors_to_4d import Change3DTo4DTensors
from finn.transformation.general import GiveUniqueNodeNames, GiveRandomTensorNames, GiveReadableTensorNames, GiveUniqueParameterTensors

#model = ModelWrapper("/tmp/quartznet.onnx")
#model = ModelWrapper("/tmp/finn_dev_mirza/end2end_quartznet_export.onnx")
#model = ModelWrapper("models/end2end_quartznet_export.onnx")
model = ModelWrapper("models/end2end_quartznet_export_dev.onnx")

model = model.transform(GiveUniqueNodeNames())
model = model.transform(GiveRandomTensorNames())
model = model.transform(GiveReadableTensorNames())
model = model.transform(GiveUniqueParameterTensors())

# Convert to supported format
model = model.transform(Change3DTo4DTensors())

model.save("/tmp/quartznet_4d.onnx")

In [25]:
from finn.util.visualization import showInNetron

showInNetron("/tmp/quartznet_4d.onnx")

Stopping http://0.0.0.0:8081
Serving '/tmp/quartznet_4d.onnx' at http://0.0.0.0:8081


# Remove LogSoftmax -> ArgMax and insert TopK

In [4]:
### Replace (LogSoftMax)->ArgMax by TopK
from finn.util.basic import get_by_name
from finn.transformation.insert_topk import InsertTopK

model = ModelWrapper("/tmp/quartznet_4d.onnx")

graph_out = model.graph.output[0]
graph_out_name = graph_out.name
orig_onnx_dtype = graph_out.type.tensor_type.elem_type

last_node = model.find_producer(graph_out_name)
# Remove Argmax and LogSoftmax
if last_node.op_type=="ArgMax":
    argmax_in = last_node.input[0]
    axis = get_by_name(last_node.attribute, "axis", "name").i
    keepdims = get_by_name(last_node.attribute, "keepdims", "name").i
    second_to_last_node = model.find_producer(argmax_in)
    if second_to_last_node.op_type=="LogSoftmax":
        logsoftmax_in = second_to_last_node.input[0]
        model.graph.node.remove(second_to_last_node)
    model.graph.node.remove(last_node)
    
# Change output node
logsoftmax_in_vi = model.get_tensor_valueinfo(logsoftmax_in)
model.graph.output.insert(0, logsoftmax_in_vi)
model.graph.output.pop(1)
model.graph.value_info.remove(logsoftmax_in_vi)
   
model = model.transform(InsertTopK(k=1, axis=axis))
  
## Set output (ONNX) dtype to original output dtype
model.graph.output[0].type.tensor_type.elem_type = orig_onnx_dtype

## Rename output tensor to original output tensor name
model.rename_tensor(model.graph.output[0].name, graph_out_name)
    
model.save("/tmp/quartznet_4d_topk.onnx")

In [None]:
from finn.util.visualization import showInNetron

showInNetron("/tmp/quartznet_4d_topk.onnx")

# Streamline

In [44]:
### STREAMLINING
from finn.util.visualization import showInNetron
from finn.core.modelwrapper import ModelWrapper

from finn.transformation.streamline import *
from finn.transformation.streamline.reorder import MoveMulPastDWConv, MoveLinearPastEltwiseAdd, MoveMulPastFork
from finn.transformation.streamline.absorb import AbsorbConsecutiveTransposes # No effect (only on consecutive transpose nodes)
from finn.transformation.streamline.absorb import AbsorbTransposeIntoMultiThreshold
from finn.transformation.streamline.absorb import AbsorbSignBiasIntoMultiThreshold
from finn.transformation.streamline.round_thresholds import RoundAndClipThresholds
from finn.transformation.batchnorm_to_affine import BatchNormToAffine

from finn.transformation.batchnorm_to_affine import BatchNormToAffine
from finn.transformation.streamline.reorder import (
    MoveAddPastMul,
    MoveAddPastConv,
    MoveMulPastFork,
    MoveScalarMulPastConv,
    MoveMulPastDWConv,
    MoveLinearPastEltwiseAdd
)
from finn.transformation.streamline.collapse_repeated import(
    CollapseRepeatedAdd,
    CollapseRepeatedMul
)
from finn.transformation.streamline.absorb import(
    AbsorbAddIntoMultiThreshold,
    AbsorbMulIntoMultiThreshold,
    FactorOutMulSignMagnitude,
    Absorb1BitMulIntoConv
)
from finn.transformation.streamline.round_thresholds import RoundAndClipThresholds
from finn.transformation.infer_datatypes import InferDataTypes

from finn.util.basic import get_by_name
from finn.core.datatype import DataType

#model = ModelWrapper("/tmp/quartznet_4d_topk.onnx")
model = ModelWrapper("/tmp/quartznet_4d.onnx")

# Absorb sign bias into multithreshold
model = model.transform(AbsorbSignBiasIntoMultiThreshold())

# Collapse BatchNorm to Add and Mul
model = model.transform(BatchNormToAffine())

# Group additions
#model = model.transform(MoveAddPastMul())
#model = model.transform(MoveAddPastConv())
#model = model.transform(MoveAddPastMul())

# Group multiplications
#### Move mul past fork
model = model.transform(MoveMulPastFork())
model = model.transform(MoveScalarMulPastConv())
model = model.transform(MoveMulPastDWConv())

# Move Mul/Add past join node
model = model.transform(MoveLinearPastEltwiseAdd())

# Collapes additions & multiplications
model = model.transform(CollapseRepeatedAdd())
model = model.transform(CollapseRepeatedMul())

# Absorb Add/Mul into multithreshold
model = model.transform(AbsorbAddIntoMultiThreshold())
model = model.transform(FactorOutMulSignMagnitude())
model = model.transform(Absorb1BitMulIntoConv())
model = model.transform(AbsorbMulIntoMultiThreshold())

## Ensure thresholds are integers
### Add quantization annotation to ensure RoundAndClipThresholds works
#for n in model.graph.node:
#    if n.op_type=="MultiThreshold":
#        odtype = get_by_name(n.attribute, "out_dtype", name_field="name").s.decode("utf-8")
#        dtype = getattr(DataType, odtype) 
#        #model.set_tensor_datatype(n.input[0], dtype)
#        model.set_tensor_datatype(n.input[0], DataType.INT32)
##from finn.transformation.infer_datatypes import InferDataTypes
##model = model.transform(InferDataTypes())
#model = model.transform(RoundAndClipThresholds())

# Ensure thresholds are integers
## Add quantization annotation to ensure RoundAndClipThresholds works
global_input_name = model.graph.input[0].name
model.set_tensor_datatype(global_input_name, DataType.INT8)

from finn.transformation.infer_datatypes import InferDataTypes
model = model.transform(InferDataTypes())
        
model = model.transform(RoundAndClipThresholds())

model.save("/tmp/quartznet_streamlined.onnx")


In [45]:
from finn.util.visualization import showInNetron
showInNetron("/tmp/quartznet_streamlined.onnx")

Stopping http://0.0.0.0:8081
Serving '/tmp/quartznet_streamlined.onnx' at http://0.0.0.0:8081


In [47]:
#### Some thresholds still have floating point quantization. Modify InferDatatypes transform...

model = ModelWrapper("/tmp/quartznet_streamlined.onnx")

for n in model.graph.node:
    if n.op_type=="MultiThreshold":
        thresholds = n.input[1]
        qa = model.get_tensor_datatype(thresholds)
        if qa==DataType.FLOAT32:
            print(n.name)

# Remove floating point scalar multiplication at output


In [51]:
from finn.core.modelwrapper import ModelWrapper
model = ModelWrapper("/tmp/quartznet_streamlined.onnx")

mul_nodes = [x for x in model.graph.node if (x.op_type=="Mul")]

# Remove floating point scalar muls before argmax
for n in mul_nodes:
    input_mul = n.input[0]
    node_after_mul = model.find_consumer(n.output[0])
    node_after_mul.input[0] = input_mul
    model.graph.node.remove(n)

model.save("/tmp/test_quartznet_4d_mulremoved.onnx")
    

In [52]:
from finn.util.visualization import showInNetron
showInNetron("/tmp/test_quartznet_4d_mulremoved.onnx")

Stopping http://0.0.0.0:8081
Serving '/tmp/test_quartznet_4d_mulremoved.onnx' at http://0.0.0.0:8081


# Partitioning

In [54]:
## PARTITIONING
from finn.util.visualization import showInNetron
from finn.core.modelwrapper import ModelWrapper

from finn.transformation.create_generic_partitions import PartitionFromDict

model = ModelWrapper("/tmp/test_quartznet_4d_mulremoved.onnx")

#partitionings = {0: range(0, 3), 
#                1: range(3, 27),
#                2: range(27, 51),
#                3: range(51, 75),
#                4: range(75, 99),
#                5: range(99, 123),
#                6: range(123, 147),
#                7: range(147, 171),
#                8: range(171, 195),
#                9: range(195, 219),
#                10: range(219, 243),
#                11: range(243, 267),
#                12: range(267, 291),
#                13: range(291, 315),
#                14: range(315, 339),
#                15: range(339, 363),
#                16: range(363, 376)}
partitionings = {0: range(0, 3), 
                1: range(3, 75),
                2: range(75, 147),
                3: range(147, 219),
                4: range(219, 291),
                5: range(291, 363),
                #6: range(363, 374), # If TopK
                6: range(363, 375) # If LogSoftMax + ArgMax
                }


model = model.transform(PartitionFromDict(partitionings))

model.save("/tmp/quartznet_streamlined_partitioned.onnx")
showInNetron("/tmp/quartznet_streamlined_partitioned.onnx")

Stopping http://0.0.0.0:8081
Serving '/tmp/quartznet_streamlined_partitioned.onnx' at http://0.0.0.0:8081


# Lowering and absoring transpose into multithreshold

In [1]:
## LOWERING and ABSORB_TRANSPOSE_INTO_MULTITHRESHOLD
from finn.util.visualization import showInNetron
from finn.core.modelwrapper import ModelWrapper
from finn.transformation.lower_convs_to_matmul import LowerConvsToMatMul
from finn.transformation.streamline.absorb import AbsorbTransposeIntoMultiThreshold
from finn.transformation.streamline.reorder import MoveTransposePastMultiThreshold, MoveTransposePastJoinAdd, MoveTransposeBeforeFork
from finn.util.basic import get_by_name


model = ModelWrapper("/tmp/quartznet_streamlined_partitioned.onnx")

for n in model.graph.node:
    path_to_partition = get_by_name(n.attribute, "model", "name").s.decode('utf-8')
    print(path_to_partition)
    model_partition = ModelWrapper(path_to_partition)
    
    # Lower
    model_partition = model_partition.transform(LowerConvsToMatMul())
    # Absorb transpose nodes
    model_partition = model_partition.transform(AbsorbTransposeIntoMultiThreshold())
    # Reorder remaining transpose nodes
    model_partition = model_partition.transform(MoveTransposePastMultiThreshold())
    model_partition = model_partition.transform(MoveTransposePastJoinAdd())
    model_partition = model_partition.transform(MoveTransposeBeforeFork())
    
    model_partition.save(path_to_partition)
    

model.save("/tmp/quartznet_streamlined_lowered.onnx")
showInNetron("/tmp/quartznet_streamlined_lowered.onnx")

/tmp/finn_dev_mirza/partitioning_3ff53d_c/partition_0.onnx
/tmp/finn_dev_mirza/partitioning_3ff53d_c/partition_1.onnx
/tmp/finn_dev_mirza/partitioning_3ff53d_c/partition_2.onnx
/tmp/finn_dev_mirza/partitioning_3ff53d_c/partition_3.onnx
/tmp/finn_dev_mirza/partitioning_3ff53d_c/partition_4.onnx
/tmp/finn_dev_mirza/partitioning_3ff53d_c/partition_5.onnx
/tmp/finn_dev_mirza/partitioning_3ff53d_c/partition_6.onnx
Serving '/tmp/quartznet_streamlined_lowered.onnx' at http://0.0.0.0:8081


In [2]:
model = ModelWrapper("/tmp/quartznet_streamlined_lowered.onnx")
showInNetron("/tmp/quartznet_streamlined_lowered.onnx")

p = model.graph.node[0]
path = get_by_name(p.attribute, "model", "name").s.decode("utf-8")

showInNetron(path)


Stopping http://0.0.0.0:8081
Serving '/tmp/quartznet_streamlined_lowered.onnx' at http://0.0.0.0:8081
Stopping http://0.0.0.0:8081
Serving '/tmp/finn_dev_mirza/partitioning_3ff53d_c/partition_0.onnx' at http://0.0.0.0:8081


In [3]:
model = ModelWrapper("/tmp/quartznet_streamlined_lowered.onnx")
showInNetron("/tmp/quartznet_streamlined_lowered.onnx")

p = model.graph.node[1]
path = get_by_name(p.attribute, "model", "name").s.decode("utf-8")

showInNetron(path)


Stopping http://0.0.0.0:8081
Serving '/tmp/quartznet_streamlined_lowered.onnx' at http://0.0.0.0:8081
Stopping http://0.0.0.0:8081
Serving '/tmp/finn_dev_mirza/partitioning_3ff53d_c/partition_1.onnx' at http://0.0.0.0:8081


# Unfolding and absorbing transpose into multithreshold (again)

## 1) Partition graph according to the 5 residual blocks

In [2]:
## UNFOLD and ABSORB TRANSPOSE again
from finn.util.visualization import showInNetron
from finn.core.modelwrapper import ModelWrapper
from finn.transformation.extend_partition import ExtendPartition
from finn.transformation.streamline.absorb import AbsorbTransposeIntoMultiThreshold
from finn.transformation.infer_datatypes import InferDataTypes
from finn.transformation.create_generic_partitions import PartitionFromDict

model = ModelWrapper("/tmp/quartznet_streamlined_lowered.onnx")

#new_partitionings = [{0: range(0, 5), 1: range(5, 92)},
#                    {2: range(2, 89)},
#                    {3: range(3, 90)},
#                    {4: range(4, 91)},
#                    {5: range(5, 92)},
#                    {6: range(6, 21)} 
#                    ]

new_partitionings = [{1: range(4, 92)},
                    {2: range(5, 92)},
                    {3: range(6, 93)},
                    {4: range(7, 94)},
                    {5: range(8, 95)}
                    ]

nodes = [n for n in model.graph.node]
for ind, n in enumerate(nodes):
    if ind == 0:
        node_ind_to_unfold = [ind, ind+1] # unfold current and next node
    else:
        node_ind_to_unfold = [ind+5] # ind+1 is the Transpose node (+4 for initial nodes)
    
    model = model.transform(ExtendPartition(node_ind_to_unfold))
    model = model.transform(AbsorbTransposeIntoMultiThreshold())
    
    if ind==0:
        model = model.transform(PartitionFromDict(new_partitionings[0], "/tmp/finn_dev_mirza/partitioning_final"))
    if ind==1:
        model = model.transform(PartitionFromDict(new_partitionings[1], "/tmp/finn_dev_mirza/partitioning_final"))
    if ind==2:
        model = model.transform(PartitionFromDict(new_partitionings[2], "/tmp/finn_dev_mirza/partitioning_final"))
    if ind==3:
        model = model.transform(PartitionFromDict(new_partitionings[3], "/tmp/finn_dev_mirza/partitioning_final"))
    if ind==4:
        model = model.transform(PartitionFromDict(new_partitionings[4], "/tmp/finn_dev_mirza/partitioning_final"))
    if ind==5:
        break
    #    model = model.transform(PartitionFromDict(new_partitionings[5]), "/tmp/finn_dev_mirza/partitioning_35sdx5v_")
    
model.save("/tmp/quartznet_absorbed_transpose.onnx")


## 2) Partition graph according to 15 residual blocks

In [4]:
## UNFOLD and ABSORB TRANSPOSE again
from finn.util.visualization import showInNetron
from finn.core.modelwrapper import ModelWrapper
from finn.transformation.extend_partition import ExtendPartition
from finn.transformation.streamline.absorb import AbsorbTransposeIntoMultiThreshold
from finn.transformation.infer_datatypes import InferDataTypes
from finn.transformation.create_generic_partitions import PartitionFromDict

model = ModelWrapper("/tmp/quartznet_streamlined_lowered.onnx")

new_partitionings = [{0: range(0,4), 1: range(4, 34), 2: range(34, 63), 3: range(63, 92)},
                     {4: range(4, 33), 5: range(33, 62), 6: range(62, 91)},
                     {7: range(7, 36), 8: range(36, 65), 9: range(65, 94)},
                     {10: range(10, 39), 11: range(39, 68), 12: range(68, 97)},
                     {13: range(13, 42), 14: range(42, 71), 15: range(71, 100)},
                     {16: range(16, 25)}
                    ]

nodes = [n for n in model.graph.node]
for ind, n in enumerate(nodes):
    if ind == 0:
        node_ind_to_unfold = [ind, ind+1] # unfold current and next node
    else:
        node_ind_to_unfold = [3*ind+2] # (+1 for initial nodes, +3 partitions, +1 for Transpose node)
    
    model = model.transform(ExtendPartition(node_ind_to_unfold))
    model = model.transform(AbsorbTransposeIntoMultiThreshold())
    
    if ind==0:
        model = model.transform(PartitionFromDict(new_partitionings[0], "/tmp/finn_dev_mirza/partitioning_final"))
    if ind==1:
        model = model.transform(PartitionFromDict(new_partitionings[1], "/tmp/finn_dev_mirza/partitioning_final"))
    if ind==2:
        model = model.transform(PartitionFromDict(new_partitionings[2], "/tmp/finn_dev_mirza/partitioning_final"))
    if ind==3:
        model = model.transform(PartitionFromDict(new_partitionings[3], "/tmp/finn_dev_mirza/partitioning_final"))
    if ind==4:
        model = model.transform(PartitionFromDict(new_partitionings[4], "/tmp/finn_dev_mirza/partitioning_final"))
    if ind==5:
        model = model.transform(PartitionFromDict(new_partitionings[5], "/tmp/finn_dev_mirza/partitioning_final"))
        break
    
model.save("/tmp/quartznet_absorbed_transpose.onnx")


In [5]:
from finn.util.visualization import showInNetron
showInNetron("/tmp/quartznet_absorbed_transpose.onnx")

Stopping http://0.0.0.0:8081
Serving '/tmp/quartznet_absorbed_transpose.onnx' at http://0.0.0.0:8081


# Convert to fpgadataflow nodes

### Access each partition, apply hls conversion transformations...

In [10]:
from finn.util.basic import get_by_name
from finn.core.modelwrapper import ModelWrapper
import finn.transformation.fpgadataflow.convert_to_hls_layers as to_hls

mem_mode = "decoupled"

model = ModelWrapper("/tmp/quartznet_absorbed_transpose.onnx")

for n in model.graph.node:
    if n.op_type=="GenericPartition":
        path_to_partition = get_by_name(n.attribute, "model", "name").s.decode('utf-8')
        print(path_to_partition)
        model_partition = ModelWrapper(path_to_partition)

        model_partition = model_partition.transform(to_hls.InferConvInpGen(), make_deepcopy=False)
        model_partition = model_partition.transform(to_hls.InferVVAU(), make_deepcopy=False)
        model_partition = model_partition.transform(to_hls.InferQuantizedStreamingFCLayer(mem_mode), make_deepcopy=False)

        model_partition = model_partition.transform(to_hls.InferThresholdingLayer(), make_deepcopy=False)

        model_partition = model_partition.transform(to_hls.InferAddStreamsLayer(), make_deepcopy=False)
        
        model_partition = model_partition.transform(to_hls.InferDuplicateStreamsLayer(), make_deepcopy=False)

        model_partition.save(path_to_partition)
    

model.save("/tmp/quartznet_hls_converted.onnx")


/tmp/finn_dev_mirza/partitioning_final/partition_0.onnx


  "Setting 0-valued first threshold to 1 to avoid vivado_hls bug"


/tmp/finn_dev_mirza/partitioning_final/partition_1.onnx




/tmp/finn_dev_mirza/partitioning_final/partition_2.onnx
/tmp/finn_dev_mirza/partitioning_final/partition_3.onnx
/tmp/finn_dev_mirza/partitioning_final/partition_4.onnx
/tmp/finn_dev_mirza/partitioning_final/partition_5.onnx
/tmp/finn_dev_mirza/partitioning_final/partition_6.onnx
/tmp/finn_dev_mirza/partitioning_final/partition_7.onnx
/tmp/finn_dev_mirza/partitioning_final/partition_8.onnx
/tmp/finn_dev_mirza/partitioning_final/partition_9.onnx
/tmp/finn_dev_mirza/partitioning_final/partition_10.onnx
/tmp/finn_dev_mirza/partitioning_final/partition_11.onnx
/tmp/finn_dev_mirza/partitioning_final/partition_12.onnx
/tmp/finn_dev_mirza/partitioning_final/partition_13.onnx
/tmp/finn_dev_mirza/partitioning_final/partition_14.onnx
/tmp/finn_dev_mirza/partitioning_final/partition_15.onnx
/tmp/finn_dev_mirza/partitioning_final/partition_16.onnx


# Folding

In [11]:
from finn.core.modelwrapper import ModelWrapper
from finn.util.basic import get_by_name
from finn.custom_op.registry import getCustomOp

model = ModelWrapper("/tmp/quartznet_hls_converted.onnx")

for n_par in model.graph.node:
    if n_par.op_type=="GenericPartition":
        path_to_partition = get_by_name(n_par.attribute, "model", "name").s.decode('utf-8')
        print(path_to_partition)
        model_partition = ModelWrapper(path_to_partition)

        for n in model_partition.graph.node:
            if n.op_type=="StreamingFCLayer_Batch":
                # Initial:
                # SIMD=1
                # PE=1
                inst = getCustomOp(n)
                mh = get_by_name(n.attribute, "MH", "name").i 
                mw = get_by_name(n.attribute, "MW", "name").i 
                if mh==29: # Check if we are at final node (TODO: make generic...)
                    assert(mw%4==0)
                    mh = 1
                else:
                    assert(mh%4==0 and mw%4==0)
                    mh = int(mh/4)
                mw = int(mw/4)
                if n.name == "pt_1_StreamingFCLayer_Batch_0":
                    inst.set_nodeattr("PE", mw) # mh % PE ==0
                    inst.set_nodeattr("SIMD", mw) # mw % SIMD ==0
                else:
                    inst.set_nodeattr("PE", mh) # mh % PE ==0
                    inst.set_nodeattr("SIMD", mw) # mw % SIMD ==0
            if n.op_type=="Vector_Vector_Activate_Batch":
                # Initial: PE = IFM_CH
                inst = getCustomOp(n)
                ifc = get_by_name(n.attribute, "Channels", "name").i
                assert(ifc%4==0)
                ifc = int(ifc/4)
                inst.set_nodeattr("PE", ifc) # CH % PE == 0
            if n.op_type=="Thresholding_Batch":
                # Initial: PE = 1
                inst = getCustomOp(n)
                ifc = get_by_name(n.attribute, "NumChannels", "name").i 
                assert(ifc%4==0)
                ifc = int(ifc/4)
                inst.set_nodeattr("PE", ifc) # CH % PE == 0
            if n.op_type=="AddStreams_Batch":
                # Initial: PE = 1
                inst = getCustomOp(n)
                ifc = get_by_name(n.attribute, "NumChannels", "name").i 
                assert(ifc%4==0)
                ifc = int(ifc/4)
                inst.set_nodeattr("PE", ifc) # CH % PE == 0
            if n.op_type=="DuplicateStreams_Batch":
                # Initial: PE = 1
                inst = getCustomOp(n)
                ifc = get_by_name(n.attribute, "NumChannels", "name").i
                assert(ifc%4==0)
                ifc = int(ifc/4)
                inst.set_nodeattr("PE", ifc) # CH % PE == 0
            if n.op_type=="FMPadding_Batch":
                # SIMD = IFM_CH
                inst = getCustomOp(n)
                ifc = get_by_name(n.attribute, "NumChannels", "name").i
                assert(ifc%4==0)
                ifc = int(ifc/4)
                inst.set_nodeattr("SIMD", ifc) # CH % PE == 0
            if n.op_type=="ConvolutionInputGenerator1D":
                # SIMD = IFM_CH
                inst = getCustomOp(n)
                ifc = get_by_name(n.attribute, "IFMChannels", "name").i
                assert(ifc%4==0)
                ifc = int(ifc/4)
                inst.set_nodeattr("SIMD", ifc) # CH % PE == 0 

        model_partition.save(path_to_partition)

model.save("/tmp/quartznet_hls_converted.onnx")
        

/tmp/finn_dev_mirza/partitioning_final/partition_0.onnx
/tmp/finn_dev_mirza/partitioning_final/partition_1.onnx
/tmp/finn_dev_mirza/partitioning_final/partition_2.onnx
/tmp/finn_dev_mirza/partitioning_final/partition_3.onnx
/tmp/finn_dev_mirza/partitioning_final/partition_4.onnx
/tmp/finn_dev_mirza/partitioning_final/partition_5.onnx
/tmp/finn_dev_mirza/partitioning_final/partition_6.onnx
/tmp/finn_dev_mirza/partitioning_final/partition_7.onnx
/tmp/finn_dev_mirza/partitioning_final/partition_8.onnx
/tmp/finn_dev_mirza/partitioning_final/partition_9.onnx
/tmp/finn_dev_mirza/partitioning_final/partition_10.onnx
/tmp/finn_dev_mirza/partitioning_final/partition_11.onnx
/tmp/finn_dev_mirza/partitioning_final/partition_12.onnx
/tmp/finn_dev_mirza/partitioning_final/partition_13.onnx
/tmp/finn_dev_mirza/partitioning_final/partition_14.onnx
/tmp/finn_dev_mirza/partitioning_final/partition_15.onnx
/tmp/finn_dev_mirza/partitioning_final/partition_16.onnx


In [15]:
from finn.util.visualization import showInNetron
showInNetron("/tmp/finn_dev_mirza/partitioning_final/partition_16.onnx")

Stopping http://0.0.0.0:8081
Serving '/tmp/finn_dev_mirza/partitioning_final/partition_16.onnx' at http://0.0.0.0:8081


## Save to notebook

In [51]:
from finn.util.basic import get_by_name

model = ModelWrapper("/tmp/quartznet_hls_converted.onnx")

i=0
for n in model.graph.node:
    if n.op_type=="GenericPartition":
        p_model_path = get_by_name(n.attribute, "model", "name").s.decode('utf-8')
        p_model = ModelWrapper(p_model_path)

        new_path = "models/end2end_quartznet_hls_layers_partition_"+str(i)+".onnx"
        p_model.save(new_path)
        
        inst = getCustomOp(n)
        inst.set_nodeattr("model", new_path)
        
        i+=1
        
model.save("models/end2end_quartznet_hls_layers.onnx")


# Comparison of graph before and after HLS conversion

## CppSim

In [58]:
import time

from finn.core.modelwrapper import ModelWrapper
from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim
from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode

#model = ModelWrapper("models/end2end_quartznet_hls_layers.onnx")
model = ModelWrapper("/tmp/quartznet_hls_converted.onnx")


for n in model.graph.node:
    if n.op_type=="GenericPartition":
        path_to_partition = get_by_name(n.attribute, "model", "name").s.decode('utf-8')
        print(path_to_partition)
        model_partition = ModelWrapper(path_to_partition)

        model_partition = model_partition.transform(PrepareCppSim())
        model_partition = model_partition.transform(CompileCppSim())
        model_partition = model_partition.transform(SetExecMode("cppsim"))

        model_partition.save(path_to_partition)
    

model.save("/tmp/quartznet_hls_converted_cppsim.onnx")

models/end2end_quartznet_hls_layers_partition_0.onnx




Process ForkPoolWorker-5:
Traceback (most recent call last):
  File "/opt/conda/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/opt/conda/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/opt/conda/lib/python3.6/multiprocessing/pool.py", line 119, in worker
    result = (True, func(*args, **kwds))
  File "/opt/conda/lib/python3.6/multiprocessing/pool.py", line 44, in mapstar
    return list(map(*args))
  File "/workspace/finn/src/finn/transformation/fpgadataflow/prepare_rtlsim.py", line 69, in applyNodeLocal
    inst.prepare_rtlsim()


KeyboardInterrupt: 

## RLTsim


In [18]:
import time
from finn.util.basic import alveo_part_map, alveo_default_platform
from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
from finn.transformation.fpgadataflow.replace_verilog_relpaths import (
    ReplaceVerilogRelPaths,
)
from finn.transformation.fpgadataflow.annotate_resources import AnnotateResources

t1 = time.perf_counter()

test_board = "U280"
test_platform = alveo_default_platform[test_board]
test_fpga_part = alveo_part_map[test_board]
target_clk_ns = 10

model = ModelWrapper("/tmp/quartznet_hls_converted.onnx")

for n in model.graph.node:
    if n.op_type=="GenericPartition":
        path_to_partition = get_by_name(n.attribute, "model", "name").s.decode('utf-8')
        print(path_to_partition)
        
        model_partition = ModelWrapper(path_to_partition)
        
        model_partition = model_partition.transform(GiveUniqueNodeNames())
        model_partition = model_partition.transform(GiveRandomTensorNames())
        model_partition = model_partition.transform(GiveReadableTensorNames())
        model_partition = model_partition.transform(GiveUniqueParameterTensors())
        
        model_partition = model_partition.transform(PrepareIP(test_fpga_part, target_clk_ns))
        model_partition = model_partition.transform(HLSSynthIP())
        model_partition = model_partition.transform(ReplaceVerilogRelPaths())
        model_partition = model_partition.transform(AnnotateResources("hls"))
        model_partition.save(path_to_partition)

model.save("/tmp/quartznet_ipgen.onnx")

t2 = time.perf_counter() - t1
print("Elapsed time: {}".format(t2))

/tmp/finn_dev_mirza/partitioning_final/partition_0.onnx


Process ForkPoolWorker-17:
Process ForkPoolWorker-16:
Process ForkPoolWorker-20:
Process ForkPoolWorker-18:
Process ForkPoolWorker-21:
Process ForkPoolWorker-19:
Process ForkPoolWorker-15:
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
  File "/opt/conda/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/opt/conda/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/opt/conda/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/opt/conda/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/opt/conda/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/opt/conda/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File 

KeyboardInterrupt: 

In [24]:
# Took ~5 hours
model_hls.save("/tmp/test_partition1_cppsim.onnx")

showInNetron("/tmp/test_partition1_cppsim.onnx")

Stopping http://0.0.0.0:8081
Serving '/tmp/test_partition1_cppsim.onnx' at http://0.0.0.0:8081


In [26]:
from finn.core.datatype import DataType
from finn.util.basic import gen_finn_dt_tensor
import finn.core.onnx_exec as oxe

model_onnx = ModelWrapper("/tmp/finn_dev_mirza/partitioning_final/partition_1.onnx")
model_hls = ModelWrapper("/tmp/test_partition1_cppsim.onnx")

inp_dtype = DataType.INT8
inp_shape = model_onnx.get_tensor_shape(model_onnx.graph.input[0].name)
x = gen_finn_dt_tensor(DataType.INT8, inp_shape)
inp_dict = {model_onnx.graph.input[0].name: x}

assert(model_onnx.graph.input[0].name==model_hls.graph.input[0].name)
assert(model_onnx.get_tensor_shape(model_onnx.graph.input[0].name)==model_hls.get_tensor_shape(model_hls.graph.input[0].name))

oxe.compare_execution(model_onnx, model_hls, inp_dict)




  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)


True

# Compare 2 models

1. Original QuartzNet
2. Any other


## Random input data

In [17]:
import numpy as np
from finn.core.modelwrapper import ModelWrapper
from finn.util.basic import gen_finn_dt_tensor
import finn.core.onnx_exec as oxe

import time
t1 = time.perf_counter()

################################################################################################
####
#### MODEL 1
####
#model_1 = ModelWrapper("models/end2end_quartznet_export.onnx") # original quartznet model
#model_1 = ModelWrapper("/tmp/quartznet.onnx")
#model_1 = ModelWrapper("/tmp/finn_dev_mirza/end2end_quartznet_export.onnx")
model_1 = ModelWrapper("/workspace/finn/end2end_quartznet_export_dev.onnx")

#### MODEL 1
# Create input data
input0_tensor_name = model_1.graph.input[0].name

input_shape = model_1.get_tensor_shape(input0_tensor_name)

## Random float
#input_dtype = model_1.get_tensor_datatype(input0_tensor_name)
#input_val = gen_finn_dt_tensor(input_dtype, input_shape)
## Random INT8
input_val = np.random.randint(low=-128, high=127, size=input_shape).astype(np.float32)

input_dict = {}
input_dict[input0_tensor_name] = input_val
output0_tensor_name = model_1.graph.output[0].name

expected_m1_dict = oxe.execute_onnx(model_1, input_dict, return_full_exec_context = False)
expected_m1 = expected_m1_dict[output0_tensor_name]
################################################################################################


t2 = time.perf_counter() - t1
print("Elapsed time: {}".format(t2))

Elapsed time: 341.2414430460194


## Golden input data

In [1]:
import numpy as np
from finn.core.modelwrapper import ModelWrapper
from finn.util.basic import gen_finn_dt_tensor
import finn.core.onnx_exec as oxe

import time
t1 = time.perf_counter()

################################################################################################
####
#### MODEL 1
####
#model_1 = ModelWrapper("models/end2end_quartznet_export.onnx") # original quartznet model (BASED on incorrect Brevitas branch)
#model_1 = ModelWrapper("/tmp/quartznet.onnx")
#model_1 = ModelWrapper("/tmp/finn_dev_mirza/end2end_quartznet_export.onnx")
model_1 = ModelWrapper("/workspace/finn/end2end_quartznet_export_dev.onnx")

#### MODEL 1
# Create input data
input0_tensor_name = model_1.graph.input[0].name

## Change input...
input_val = np.load("brevitas_reference/end2end_quartznet_input.npy")
input_val = input_val[:,:,0:256]

input_dict = {}
input_dict[input0_tensor_name] = input_val
output0_tensor_name = model_1.graph.output[0].name

expected_m1_dict = oxe.execute_onnx(model_1, input_dict, return_full_exec_context = False)
expected_m1 = expected_m1_dict[output0_tensor_name]
################################################################################################


t2 = time.perf_counter() - t1
print("Elapsed time: {}".format(t2))

Elapsed time: 359.29716781596653


In [2]:
onnx_out = expected_m1
golden_out = np.load("brevitas_reference/end2end_quartznet_predictions.npy")

onnx_out_length = np.shape(onnx_out)[1]
golden_out = golden_out[:,0:onnx_out_length]

onnx_out = onnx_out.flatten()
golden_out = golden_out.flatten()

for idx, x in enumerate(golden_out):
    print("{}\t{}".format(x, onnx_out[idx]))

#assert(golden_out==onnx_out).all()

28	28
28	28
28	28
28	28
28	28
28	28
28	28
28	28
28	28
28	28
28	28
28	28
28	28
28	28
28	28
28	28
28	28
28	28
28	28
28	28
28	28
28	28
28	28
28	28
22	22
28	28
28	28
1	1
28	28
28	28
14	14
28	28
28	28
9	9
28	28
28	28
20	20
28	28
28	28
25	25
28	28
28	28
28	28
0	0
0	0
28	28
1	1
14	14
4	4
28	4
0	0
0	0
28	28
22	22
28	28
28	28
5	5
28	28
28	28
28	28
28	28
24	24
28	28
28	28
28	28
28	28
1	1
28	28
28	28
28	28
20	20
9	9
9	9
28	28
15	15
14	14
28	28
28	28
0	0
0	0
0	0
28	28
28	28
15	15
6	6
28	28
28	28
0	28
0	0
28	28
28	28
19	19
19	19
28	28
16	16
28	28
28	28
28	28
28	28
1	1
28	28
28	28
14	4
28	28
28	28
28	28
28	28
28	28
28	28
28	28
28	28
28	28
28	28
28	28
28	28
28	28
28	28
28	28
28	28
28	28
28	28
28	28
28	28
28	28
28	28
28	28
28	28
28	28


In [18]:
import time
t1 = time.perf_counter()

################################################################################################
####
#### MODEL 2
####
model_2 = ModelWrapper("/tmp/quartznet_streamlined.onnx") # CORRECT (new inferred datatypes)
#model_2 = ModelWrapper("/tmp/quartznet_streamlined_partitioned.onnx") #CORRECT
#model_2 = ModelWrapper("/tmp/quartznet_streamlined_lowered.onnx") #CORRECT
#model_2 = ModelWrapper("/tmp/quartznet_temp_test.onnx") #CORRECT?
#model_2 = ModelWrapper("/tmp/quartznet_4d_topk.onnx") # CORRECT (new)

#model_2 = ModelWrapper("/tmp/test_quartznet_mulremoved.onnx") # CORRECT (new)

#### MODEL 2
m1_input_val = input_val

input0_tensor_name = model_2.graph.input[0].name
#input_shape = model_2.get_tensor_shape(input0_tensor_name)
#input_dtype = model_2.get_tensor_datatype(input0_tensor_name)
input_dict = {}
m2_input_val = np.reshape(m1_input_val, np.shape(m1_input_val)+(1,))
input_dict[input0_tensor_name] = m2_input_val
output0_tensor_name = model_2.graph.output[0].name

expected_m2_dict = oxe.execute_onnx(model_2, input_dict, return_full_exec_context = False)
expected_m2 = expected_m2_dict[output0_tensor_name]

expected_m2 = np.reshape(expected_m2, np.shape(expected_m1))
m2_input_val = np.reshape(m2_input_val, np.shape(m1_input_val))


assert(m1_input_val==m2_input_val).all()
assert(expected_m1==expected_m2).all()
################################################################################################


t2 = time.perf_counter() - t1
print("Elapsed time: {}".format(t2))

  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype

  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype

  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype

  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype

  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype

  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype

  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype

Elapsed time: 781.0633141910075


In [None]:
for idx,el in enumerate(expected_m1):
    print("{}\t{}".format(expected_m1, expected_m2))