In [1]:
# To get onnx file, run in terminal:
# python setup.py test --addopts "-k test_brevitas_quartznet" 
import numpy as np
from finn.util.visualization import showInNetron
from finn.core.modelwrapper import ModelWrapper

file_name = '/tmp/quartznet.onnx'
showInNetron(file_name)

Serving '/tmp/quartznet.onnx' at http://0.0.0.0:8081


# Construct Conv+Mul+Mt node in QuartzNet

In [3]:
import onnx
from finn.util.basic import gen_finn_dt_tensor
from finn.core.datatype import DataType
from finn.util.basic import get_by_name


def set_all_initializers(model):
    """ Sets all initializers of the graph to a random value. """
    for n in model.graph.node:
        if len(n.input) > 1:
            init_name = n.input[1]
            init_shape = model.get_tensor_shape(init_name)
            init_val = gen_finn_dt_tensor(DataType.FLOAT32, init_shape)
            model.set_initializer(init_name, init_val)

Conv1_node = onnx.helper.make_node(
    "Conv",
    inputs=['in1_conv1', 'in2_conv1'],
    outputs=['out1_conv1'],
    name="Conv1",
    dilations=[1],
    group=256,
    kernel_shape=[33],
    pads=[16, 16],
    strides=[1]
)

Mul1_node = onnx.helper.make_node(
    "Mul",
    inputs=['out1_conv1', 'in2_mul1'],
    outputs=['out1_mul1']
)

MultiThreshold1_node = onnx.helper.make_node(
    "MultiThreshold",
    inputs=['out1_mul1', 'in2_mt1'],
    outputs=['out1_mt1'],
    out_dtype='UINT4'
)


# Inputs
in1_conv1 = onnx.helper.make_tensor_value_info("in1_conv1", onnx.TensorProto.FLOAT, [1, 256, 128])
out1_add7 = onnx.helper.make_tensor_value_info("out1_mt1", onnx.TensorProto.FLOAT, [1, 256, 128])

# Value infos
out1_conv1 = onnx.helper.make_tensor_value_info("out1_conv1", onnx.TensorProto.FLOAT, [1, 256, 128])
out1_mul1 = onnx.helper.make_tensor_value_info("out1_mul1", onnx.TensorProto.FLOAT, [1, 256, 128])

# Initializers
in2_conv1 = onnx.helper.make_tensor_value_info("in2_conv1", onnx.TensorProto.FLOAT, [256, 1, 33])
in2_mul1 = onnx.helper.make_tensor_value_info("in2_mul1", onnx.TensorProto.FLOAT, [1])
in2_mt1 = onnx.helper.make_tensor_value_info("in2_mt1", onnx.TensorProto.FLOAT, [256, 15])

# Graph
graph = onnx.helper.make_graph(
    nodes=[
        Conv1_node,
        Mul1_node,
        MultiThreshold1_node
    ],
    name="test_graph",
    inputs=[in1_conv1],
    outputs=[out1_add7],
    value_info=[
        out1_conv1,
        out1_mul1,
        in2_conv1,
        in2_mul1, 
        in2_mt1
    ]
)

onnx_model = onnx.helper.make_model(graph, producer_name="4d_conversion_test-model")
model = ModelWrapper(onnx_model)
set_all_initializers(model)
model.save("/tmp/conv_mul_mt_graph.onnx")

## 3D to 4D
from finn.transformation.change_3d_tensors_to_4d import Change3DTo4DTensors
model = ModelWrapper("/tmp/conv_mul_mt_graph.onnx")
model = model.transform(Change3DTo4DTensors())
model.save("/tmp/conv_mul_mt_graph_4d.onnx")

## Streamline
from finn.transformation.streamline.absorb import AbsorbMulIntoMultiThreshold
model = ModelWrapper("/tmp/conv_mul_mt_graph_4d.onnx")
model = model.transform(AbsorbMulIntoMultiThreshold())
model.save("/tmp/conv_mul_mt_graph_streamlined.onnx")

## Lowering
from finn.transformation.lower_convs_to_matmul import LowerConvsToMatMul
model = ModelWrapper("/tmp/conv_mul_mt_graph_streamlined.onnx")
model = model.transform(LowerConvsToMatMul())
model.save("/tmp/conv_mul_mt_graph_lowered.onnx")

## Convert to HLS
#from finn.transformation.fpgadataflow.convert_to_hls_layers import InferConvInpGen
#model = ModelWrapper("/tmp/conv_mul_mt_graph_lowered.onnx")
#model = model.transform(InferConvInpGen())
#model.save("/tmp/conv_mul_mt_graph_hls.onnx")

In [None]:
file1 = "/tmp/conv_mul_mt_graph.onnx"
file2 = "/tmp/conv_mul_mt_graph_4d.onnx"
file3 = "/tmp/conv_mul_mt_graph_streamlined.onnx"
file4 = "/tmp/conv_mul_mt_graph_lowered.onnx"
#ile5 = "/tmp/conv_mul_mt_graph_hls.onnx"

showInNetron(file4)

# Construct FMPadding_Batch + ConvInputGenerator

In [7]:
import onnx
from finn.util.basic import gen_finn_dt_tensor
from finn.core.datatype import DataType
from finn.util.basic import get_by_name

model=ModelWrapper("/tmp/conv_mul_mt_graph_lowered.onnx")

node_ind=0
for n in model.graph.node:
    node_ind += 1
    if n.op_type=='Im2Col':
        padding_node = onnx.helper.make_node(
        "FMPadding_Batch",
        [n.input[0]],
        [n.output[0]],
        domain="finn.custom_op.fpgadataflow",
        backend="fpgadataflow",
        ImgDim=100,
        Padding=1,
        NumChannels=1,
        inputDataType=onnx.TensorProto.FLOAT,
        SIMD=1
        )
        #graph.node.remove(n)
        graph.node.insert(node_ind, padding_node)

for n in model.graph.node:
    print(n.op_type)
model.save("/tmp/conv_mul_mt_graph_hls.onnx")
showInNetron("/tmp/conv_mul_mt_graph_hls.onnx")

Transpose
Im2Col
MatMul
Transpose
MultiThreshold

Stopping http://0.0.0.0:8081
Serving '/tmp/conv_mul_mt_graph_hls.onnx' at http://0.0.0.0:8081


# Extracting repetitive part

In [2]:
import numpy as np
from finn.util.visualization import showInNetron
from finn.core.modelwrapper import ModelWrapper
from finn.transformation.general import *
import copy

model = ModelWrapper("/tmp/quartznet.onnx")
model = model.transform(GiveUniqueNodeNames())

model.save("/tmp/quartznet_uniqueNames.onnx")
#showInNetron("/tmp/quartznet_uniqueNames.onnx")

list_of_repetitions = []
for idx, n in enumerate(model.graph.node):
    is_fork = model.is_fork_node(n)
    if is_fork is True:
        rep = {}
        start_node_id = idx
        rep[n.name] = idx
        for idx, n in enumerate(model.graph.node):
            if idx<=start_node_id:
                continue
            is_join = model.is_join_node(n)
            if is_join is True:
                end_node_id = idx+1 # one node after the join node
                end_node_name = model.graph.node[end_node_id].name
                rep[end_node_name] = end_node_id
                list_of_repetitions.append(rep)
                break
        
print("{}".format(list_of_repetitions))

rep_structure = list_of_repetitions[0] #0-14
start_end = []
for v in rep_structure.values():
    start_end.append(v)
print(start_end)
start_node_id = start_end[0]
end_node_id = start_end[1]

nodes = copy.deepcopy(model.graph.node)
for idx, n in enumerate(nodes):
    if idx<start_node_id or idx>end_node_id:
        model.graph.node.remove(n)

model.transform(RemoveUnusedTensors())
        
model.save("/tmp/quartznet_repetitive_nodes.onnx")
showInNetron("/tmp/quartznet_repetitive_nodes.onnx")

[{'Mul_3': 9, 'MultiThreshold_13': 68}, {'Mul_26': 69, 'MultiThreshold_25': 128}, {'Mul_49': 129, 'MultiThreshold_37': 188}, {'Mul_72': 189, 'MultiThreshold_49': 248}, {'Mul_95': 249, 'MultiThreshold_61': 308}, {'Mul_118': 309, 'MultiThreshold_73': 368}, {'Mul_141': 369, 'MultiThreshold_85': 428}, {'Mul_164': 429, 'MultiThreshold_97': 488}, {'Mul_187': 489, 'MultiThreshold_109': 548}, {'Mul_210': 549, 'MultiThreshold_121': 608}, {'Mul_233': 609, 'MultiThreshold_133': 668}, {'Mul_256': 669, 'MultiThreshold_145': 728}, {'Mul_279': 729, 'MultiThreshold_157': 788}, {'Mul_302': 789, 'MultiThreshold_169': 848}, {'Mul_325': 849, 'MultiThreshold_181': 908}]
[9, 68]
Stopping http://0.0.0.0:8081
Serving '/tmp/quartznet_repetitive_nodes.onnx' at http://0.0.0.0:8081


In [4]:
from onnx import TensorProto
from onnx import helper as oh
import onnx

model = ModelWrapper("/tmp/quartznet_repetitive_nodes.onnx")

in_tensor=onnx.helper.make_tensor_value_info("in_tensor", onnx.TensorProto.FLOAT, [1, 256, 128])
out_tensor=onnx.helper.make_tensor_value_info("out_tensor", onnx.TensorProto.FLOAT, [1, 256, 128])

old_input = model.graph.input[0]
model.graph.input.remove(old_input)
model.graph.input.extend([in_tensor])

old_output = model.graph.output[0]
model.graph.output.remove(old_output)
model.graph.output.extend([out_tensor])

input_node = model.graph.node[0]
input_node.input[0] = 'in_tensor'

output_node = model.graph.node[-1]
output_node.output[0] = 'out_tensor'

##### To have unique initializers:
#list_of_initializers = []
#for n in model.graph.node:
#    init_name = n.input[1]
#    if init_name in list_of_initializers:
#        init_val = model.get_initializer(n.input[1])
#        init_shape = np.shape(init_val)
#        if len(init_shape) < 1:
#            init_shape = (1,)       
#        new_init = oh.make_tensor_value_info(
#            model.make_new_valueinfo_name(), TensorProto.FLOAT, init_shape
#        )
#        model.graph.value_info.append(new_init)
#        model.set_initializer(new_init.name, init_val)
#        n.input[1] = new_init.name
#    else:
#        list_of_initializers.append(n.input[1])
#####

from finn.transformation.general import GiveRandomTensorNames, GiveReadableTensorNames
model = model.transform(GiveRandomTensorNames())
model = model.transform(GiveReadableTensorNames())

model.save("/tmp/quartznet_repetitive_nodes_subpart.onnx")
showInNetron("/tmp/quartznet_repetitive_nodes_subpart.onnx")

Stopping http://0.0.0.0:8081
Serving '/tmp/quartznet_repetitive_nodes_subpart.onnx' at http://0.0.0.0:8081


# For running tests

In [48]:
from finn.transformation.general import GiveRandomTensorNames, GiveReadableTensorNames, GiveUniqueNodeNames
model = ModelWrapper("/tmp/quartznet.onnx")
model = model.transform(GiveUniqueNodeNames())
model = model.transform(GiveRandomTensorNames())
model = model.transform(GiveReadableTensorNames())
model.save("/tmp/quartznet_uniqueNames.onnx")

In [4]:
# Add timer and test streamlining over complete QuartzNet (if even possible)
from finn.transformation.streamline import *
from finn.transformation.streamline.reorder import MoveMulPastDWConv, MoveLinearPastEltwiseAdd, MoveMulPastFork
from finn.transformation.change_3d_tensors_to_4d import Change3DTo4DTensors
from finn.util.basic import get_by_name
from finn.core.datatype import DataType
import time

model = ModelWrapper("/tmp/quartznet_uniqueNames.onnx")

## Convert to float128 to prevent overflow error 
#### However... the warning is still present..
for n in model.graph.node:
    if len(n.input)>1:
        init_val = model.get_initializer(n.input[1])
        if init_val is None:
            continue
        else:
            old_dtype = init_val.dtype
            init_val = init_val.astype(np.float64, casting='safe')
            model.set_initializer(n.input[1], init_val)
            new_dtype = model.get_initializer(n.input[1]).dtype

            
start_time = time.perf_counter()
###############################################

model = model.transform(Change3DTo4DTensors())
model = model.transform(BatchNormToAffine())
model = model.transform(MoveAddPastMul())
model = model.transform(MoveAddPastConv())
model = model.transform(MoveAddPastMul())
model = model.transform(MoveMulPastFork())
model = model.transform(MoveScalarMulPastConv())
model = model.transform(MoveMulPastDWConv())
model = model.transform(MoveLinearPastEltwiseAdd())
model = model.transform(CollapseRepeatedAdd()) # (output node datatype is set to FLOAT32 by default here)
model = model.transform(CollapseRepeatedMul()) # (output node datatype is set to FLOAT32 by default here)
model = model.transform(AbsorbAddIntoMultiThreshold())
model = model.transform(FactorOutMulSignMagnitude())
model = model.transform(Absorb1BitMulIntoConv())
model = model.transform(AbsorbMulIntoMultiThreshold())

## Add quantization annotation to ensure RoundAndClipThresholds works
for n in model.graph.node:
    if n.op_type=="MultiThreshold":
        odtype = get_by_name(n.attribute, "out_dtype", name_field="name").s.decode("utf-8")
        dtype = getattr(DataType, odtype) 
        
        # Set tensor datatype equal to expected output datatype
        #model.set_tensor_datatype(n.input[0], dtype)
        model.set_tensor_datatype(n.input[0], DataType.INT16)

model = model.transform(RoundAndClipThresholds())

#model = model.transform(LowerConvsToMatMul())

#model = model.transform(AbsorbTransposeIntoMultiThreshold())

###############################################
elapsed_time = time.perf_counter() - start_time


print("{}".format(elapsed_time))

model.save("/tmp/quartznet_uniqueNames_streamlined.onnx")

  Tnew = T / A.reshape(-1, 1)


313.2522694749641


In [5]:
showInNetron("/tmp/quartznet_uniqueNames_streamlined.onnx")


Stopping http://0.0.0.0:8081
Serving '/tmp/quartznet_uniqueNames_streamlined.onnx' at http://0.0.0.0:8081


In [1]:
from finn.core.modelwrapper import ModelWrapper
from finn.transformation.lower_convs_to_matmul import LowerConvsToMatMul

model = ModelWrapper("/tmp/quartznet_uniqueNames_streamlined_lowered105.onnx")

for i in range(0,5):
    model = model.transform(LowerConvsToMatMul(), make_deepcopy=False, cleanup=True)

model.save("/tmp/quartznet_uniqueNames_streamlined_lowered110.onnx")

False
False
False
False
False


In [3]:
from finn.util.visualization import showInNetron
showInNetron("/tmp/quartznet_uniqueNames_streamlined_lowered90.onnx")



Stopping http://0.0.0.0:8081
Serving '/tmp/quartznet_uniqueNames_streamlined_lowered90.onnx' at http://0.0.0.0:8081


----------------------------------------
Exception happened during processing of request from ('172.17.0.1', 58744)
Traceback (most recent call last):
  File "/opt/conda/lib/python3.6/socketserver.py", line 654, in process_request_thread
    self.finish_request(request, client_address)
  File "/opt/conda/lib/python3.6/socketserver.py", line 364, in finish_request
    self.RequestHandlerClass(request, client_address, self)
  File "/opt/conda/lib/python3.6/socketserver.py", line 724, in __init__
    self.handle()
  File "/opt/conda/lib/python3.6/http/server.py", line 418, in handle
    self.handle_one_request()
  File "/opt/conda/lib/python3.6/http/server.py", line 406, in handle_one_request
    method()
  File "/opt/conda/lib/python3.6/site-packages/netron/server.py", line 108, in do_GET
    self.handler()
  File "/opt/conda/lib/python3.6/site-packages/netron/server.py", line 105, in handler
    self.wfile.write(buffer)
  File "/opt/conda/lib/python3.6/socketserver.py", line 803, in wri

In [57]:
start_time = time.perf_counter()
model = model.transform(BatchNormToAffine())
elapsed_time = time.perf_counter() - start_time
print("{}".format(elapsed_time))

model.save("/tmp/quartznet_uniqueNames_streamlined.onnx")

14.575736945000244


In [58]:
start_time = time.perf_counter()
model = model.transform(MoveAddPastMul())
elapsed_time = time.perf_counter() - start_time
print("{}".format(elapsed_time))

model.save("/tmp/quartznet_uniqueNames_streamlined.onnx")

15.949343863001559


In [59]:
start_time = time.perf_counter()
model = model.transform(MoveAddPastConv())
elapsed_time = time.perf_counter() - start_time
print("{}".format(elapsed_time))

model.save("/tmp/quartznet_uniqueNames_streamlined.onnx")

23.04196755500743


In [60]:
start_time = time.perf_counter()
model = model.transform(MoveAddPastMul())
elapsed_time = time.perf_counter() - start_time
print("{}".format(elapsed_time))

model.save("/tmp/quartznet_uniqueNames_streamlined.onnx")

16.880800654005725


In [61]:
start_time = time.perf_counter()
model = model.transform(MoveMulPastFork())
elapsed_time = time.perf_counter() - start_time
print("{}".format(elapsed_time))

model.save("/tmp/quartznet_uniqueNames_streamlined.onnx")

13.69963248699787


In [62]:
start_time = time.perf_counter()
model = model.transform(MoveScalarMulPastConv())
elapsed_time = time.perf_counter() - start_time
print("{}".format(elapsed_time))

model.save("/tmp/quartznet_uniqueNames_streamlined.onnx")

25.285983692010632


In [63]:
start_time = time.perf_counter()
model = model.transform(MoveMulPastDWConv())
elapsed_time = time.perf_counter() - start_time
print("{}".format(elapsed_time))

model.save("/tmp/quartznet_uniqueNames_streamlined.onnx")

17.653810796007747


In [64]:
start_time = time.perf_counter()
model = model.transform(MoveLinearPastEltwiseAdd())
elapsed_time = time.perf_counter() - start_time
print("{}".format(elapsed_time))

model.save("/tmp/quartznet_uniqueNames_streamlined.onnx")

17.26131905199145


In [65]:
start_time = time.perf_counter()
model = model.transform(CollapseRepeatedAdd())
elapsed_time = time.perf_counter() - start_time
print("{}".format(elapsed_time))

model.save("/tmp/quartznet_uniqueNames_streamlined.onnx")

12.518790405010805


In [66]:
start_time = time.perf_counter()
model = model.transform(CollapseRepeatedMul())
elapsed_time = time.perf_counter() - start_time
print("{}".format(elapsed_time))

model.save("/tmp/quartznet_uniqueNames_streamlined.onnx")

12.648896538012195


In [67]:
start_time = time.perf_counter()
model = model.transform(AbsorbAddIntoMultiThreshold())
elapsed_time = time.perf_counter() - start_time
print("{}".format(elapsed_time))

model.save("/tmp/quartznet_uniqueNames_streamlined.onnx")

2.107187604997307


In [68]:
start_time = time.perf_counter()
model = model.transform(FactorOutMulSignMagnitude())
elapsed_time = time.perf_counter() - start_time
print("{}".format(elapsed_time))

model.save("/tmp/quartznet_uniqueNames_streamlined.onnx")

1.854793280013837


In [69]:
start_time = time.perf_counter()
model = model.transform(Absorb1BitMulIntoConv())
elapsed_time = time.perf_counter() - start_time
print("{}".format(elapsed_time))

model.save("/tmp/quartznet_uniqueNames_streamlined.onnx")

64.46866467001382


In [70]:
start_time = time.perf_counter()
model = model.transform(AbsorbMulIntoMultiThreshold())
elapsed_time = time.perf_counter() - start_time
print("{}".format(elapsed_time))

model.save("/tmp/quartznet_uniqueNames_streamlined.onnx")

  Tnew = T / A.reshape(-1, 1)


1.7690469119988848


In [71]:
start_time = time.perf_counter()
model = model.transform(RoundAndClipThresholds())
elapsed_time = time.perf_counter() - start_time
print("{}".format(elapsed_time))

model.save("/tmp/quartznet_uniqueNames_streamlined.onnx")

0.6426278399885632


# Apply streamlining

In [3]:
from finn.transformation.streamline import *
from finn.transformation.streamline.reorder import MoveMulPastDWConv, MoveLinearPastEltwiseAdd, MoveMulPastFork
from finn.transformation.change_3d_tensors_to_4d import Change3DTo4DTensors
from finn.transformation.lower_convs_to_matmul import LowerConvsToMatMul
from finn.transformation.streamline.absorb import AbsorbConsecutiveTransposes # No effect (only on consecutive transpose nodes)
from finn.transformation.streamline.absorb import AbsorbTransposeIntoMultiThreshold
from finn.util.basic import get_by_name
from finn.core.datatype import DataType
from finn.core.modelwrapper import ModelWrapper
from finn.util.visualization import showInNetron
from finn.transformation.general import GiveRandomTensorNames, GiveReadableTensorNames, GiveUniqueParameterTensors

model = ModelWrapper("/tmp/quartznet_repetitive_nodes_subpart.onnx")

model = model.transform(GiveRandomTensorNames())
model = model.transform(GiveReadableTensorNames())
model = model.transform(GiveUniqueParameterTensors())

# Convert to supported format
model = model.transform(Change3DTo4DTensors())

# Collapse BatchNorm to Add and Mul
model = model.transform(BatchNormToAffine())

# Group additions
model = model.transform(MoveAddPastMul())
model = model.transform(MoveAddPastConv())
model = model.transform(MoveAddPastMul())

# Group multiplications
#### Move mul past fork
model = model.transform(MoveMulPastFork())
model = model.transform(MoveScalarMulPastConv())
model = model.transform(MoveMulPastDWConv())

# Move Mul/Add past join node
model = model.transform(MoveLinearPastEltwiseAdd())

# Collapes additions & multiplications
model = model.transform(CollapseRepeatedAdd())
model = model.transform(CollapseRepeatedMul())

# Absorb Add/Mul into multithreshold
model = model.transform(AbsorbAddIntoMultiThreshold())
model = model.transform(FactorOutMulSignMagnitude())
model = model.transform(Absorb1BitMulIntoConv())
model = model.transform(AbsorbMulIntoMultiThreshold())

# Ensure thresholds are integers
## Add quantization annotation to ensure RoundAndClipThresholds works
for n in model.graph.node:
    if n.op_type=="MultiThreshold":
        odtype = get_by_name(n.attribute, "out_dtype", name_field="name").s.decode("utf-8")
        dtype = getattr(DataType, odtype) 
        #model.set_tensor_datatype(n.input[0], dtype)
        model.set_tensor_datatype(n.input[0], DataType.INT32)

#from finn.transformation.infer_datatypes import InferDataTypes
#model = model.transform(InferDataTypes())
        
model = model.transform(RoundAndClipThresholds())

model = model.transform(LowerConvsToMatMul())

model = model.transform(AbsorbTransposeIntoMultiThreshold())


model.save("/tmp/quartznet_repetitive_nodes_subpart_streamlined.onnx")
showInNetron("/tmp/quartznet_repetitive_nodes_subpart_streamlined.onnx")

Stopping http://0.0.0.0:8081
Serving '/tmp/quartznet_repetitive_nodes_subpart_streamlined.onnx' at http://0.0.0.0:8081


In [2]:
import finn.core.onnx_exec as oxe
from finn.util.basic import *

def generate_random_input(model):
    """ Creates input dictionary with a random numpy array that matches the input tensor shape """
    i_shape = []
    input_dict={}
    for i in range(len(model.graph.input)):
        input_node = model.graph.input[i]
        input_node_name = input_node.name
        input_node_shape = model.get_tensor_shape(input_node_name)

        #i_val = gen_finn_dt_tensor(DataType.FLOAT32, input_node_shape)
        i_val = np.random.randint(0, 128, input_node_shape).astype(np.float32) #float32 is expected input
        
        input_dict[input_node_name] = i_val
    return input_dict

model = ModelWrapper("/tmp/quartznet_repetitive_nodes_subpart.onnx")
input_dict = generate_random_input(model)
print(input_dict)
output_node_name = model.graph.output[0].name
output_dict = oxe.execute_onnx(model, input_dict, return_full_exec_context=True)
expected_old = output_dict[output_node_name]

print("{}\n{}".format(np.shape(expected_old),expected_old))


{'global_in': array([[[ 39., 107.,   9., ...,  80.,  37., 104.],
        [  0., 100.,  21., ..., 105.,  61.,  99.],
        [ 68.,   2.,  58., ...,  25.,  77.,  34.],
        ...,
        [ 53.,  43., 111., ...,  23.,  70., 112.],
        [ 64.,  62.,  15., ...,  23., 105.,  74.],
        [ 30.,  27.,  35., ...,  91., 123.,  31.]]], dtype=float32)}
(1, 256, 128)
[[[15. 15. 15. ... 15. 15. 15.]
  [15.  0.  0. ...  0.  0.  0.]
  [15.  0. 15. ... 15.  0. 15.]
  ...
  [ 0.  0.  0. ...  0.  0. 15.]
  [ 0.  0.  0. ...  0.  0.  0.]
  [15. 15. 15. ... 15. 15. 15.]]]


In [3]:
old_input_name = model.graph.input[0].name
old_input_dict_val = input_dict[old_input_name]

model = ModelWrapper("/tmp/quartznet_repetitive_nodes_subpart_streamlined.onnx")

input_dict_val = old_input_dict_val
if len(np.shape(input_dict_val))<4:
    input_dict_val = np.reshape(input_dict_val, np.shape(input_dict_val)+(1,))
    
input_dict[model.graph.input[0].name] = input_dict_val

assert (input_dict_val==np.reshape(old_input_dict_val,np.shape(input_dict_val))).all()

output_node_name = model.graph.output[0].name
output_dict = oxe.execute_onnx(model, input_dict, return_full_exec_context=True)
expected_new = output_dict[output_node_name]

expected_new = np.reshape(expected_new, np.shape(expected_old))

print("{}\n{}".format(np.shape(expected_new),expected_new))

assert(expected_old==expected_new).all()



  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)
  "FINN datatype.".format(tensor, dtype)


(1, 256, 128)
[[[15. 15. 15. ... 15. 15. 15.]
  [15.  0.  0. ...  0.  0.  0.]
  [15.  0. 15. ... 15.  0. 15.]
  ...
  [ 0.  0.  0. ...  0.  0. 15.]
  [ 0.  0.  0. ...  0.  0.  0.]
  [15. 15. 15. ... 15. 15. 15.]]]


# Compare results before and after convolution lowering (and 3D to 4D transform)

In [14]:
import finn.core.onnx_exec as oxe
from finn.util.basic import *

model = ModelWrapper("/tmp/quartznet_repetitive_nodes_subpart.onnx")

def generate_random_input(model):
    """ Creates input dictionary with a random numpy array that matches the input tensor shape """
    i_shape = []
    input_dict={}
    for i in range(len(model.graph.input)):
        input_node = model.graph.input[i]
        input_node_name = input_node.name
        input_node_shape = model.get_tensor_shape(input_node_name)

        i_val = gen_finn_dt_tensor(DataType.FLOAT32, input_node_shape)
        input_dict[input_node_name] = i_val*100
    return input_dict

input_dict = generate_random_input(model)
print(input_dict)

output_node_name = model.graph.output[0].name
output_dict = oxe.execute_onnx(model, input_dict, return_full_exec_context=True)
expected_old = output_dict[output_node_name]

print("{}\n{}".format(np.shape(expected_old),expected_old))

showInNetron("/tmp/quartznet_repetitive_nodes_subpart.onnx")

{'in_tensor': array([[[  24.729836  ,  -40.49649   ,   34.416332  , ...,
          235.03418   ,    4.5277877 ,   92.97527   ],
        [  79.53794   ,  -40.87957   ,   73.34267   , ...,
           22.228552  ,   84.4919    ,   28.186167  ],
        [  98.58585   ,  -57.142715  ,  -31.651863  , ...,
           -1.5053351 , -156.42383   ,  -83.71322   ],
        ...,
        [  37.764263  ,   87.98556   ,   85.11785   , ...,
          -60.03188   ,  201.65121   ,  -35.476322  ],
        [-163.42583   , -100.1717    ,  227.94485   , ...,
         -143.96017   ,  -10.710757  ,  -60.51425   ],
        [  66.505356  ,   -0.40468973, -132.48183   , ...,
           10.493427  ,   27.054792  ,   -6.463161  ]]], dtype=float32)}
(1, 256, 128)
[[[15. 15. 15. ... 15. 15. 15.]
  [ 0.  0. 15. ... 15. 15.  0.]
  [15.  0. 15. ...  0. 15.  0.]
  ...
  [ 0. 15. 15. ... 15. 15. 15.]
  [ 0.  0.  0. ...  0.  0.  0.]
  [ 0. 15. 15. ... 15. 15. 15.]]]

Stopping http://0.0.0.0:8081
Serving '/tmp/quartznet_rep

In [15]:
from finn.transformation.change_3d_tensors_to_4d import Change3DTo4DTensors

model = model.transform(Change3DTo4DTensors())
#for k,v in input_dict.items():
#    old_in_name = k
#    old_in_val = v
#    old_shape = np.shape(v)
#    new_in_name = model.graph.input[0].name
#    new_shape = old_shape + (1,)
#new_in_val = np.reshape(v, new_shape)
#del input_dict[old_in_name]
#input_dict[new_in_name] = new_in_val
input_dict_val = input_dict['global_in']
input_dict_val = np.reshape(input_dict_val, np.shape(input_dict_val)+(1,))
input_dict[model.graph.input[0].name] = input_dict_val

output_node_name = model.graph.output[0].name
output_dict = oxe.execute_onnx(model, input_dict, return_full_exec_context=True)
expected_new = output_dict[output_node_name]

expected_new = np.reshape(expected_new, np.shape(expected_old))

print("{}\n{}".format(np.shape(expected_new),expected_new))

assert(expected_old==expected_new).all()

model.save("/tmp/quartznet_subpart_4d.onnx")
showInNetron("/tmp/quartznet_subpart_4d.onnx")

(1, 256, 128, 1)
(1, 256, 128)
[[[15. 15. 15. ... 15. 15. 15.]
  [ 0.  0. 15. ... 15. 15.  0.]
  [15.  0. 15. ...  0. 15.  0.]
  ...
  [ 0. 15. 15. ... 15. 15. 15.]
  [ 0.  0.  0. ...  0.  0.  0.]
  [ 0. 15. 15. ... 15. 15. 15.]]]

Stopping http://0.0.0.0:8081
Serving '/tmp/quartznet_subpart_4d.onnx' at http://0.0.0.0:8081


In [16]:
from finn.transformation.lower_convs_to_matmul import LowerConvsToMatMul

model = ModelWrapper("/tmp/quartznet_subpart_4d.onnx")
model = model.transform(LowerConvsToMatMul())

output_node_name = model.graph.output[0].name
output_dict = oxe.execute_onnx(model, input_dict, return_full_exec_context=True)
expected_lowered = output_dict[output_node_name]

expected_lowered = np.reshape(expected_lowered, np.shape(expected_old))

print("{}\n{}".format(np.shape(expected_lowered),expected_lowered))

assert(expected_lowered==expected_new).all()

model.save("/tmp/quartznet_subpart_lowered.onnx")
showInNetron("/tmp/quartznet_subpart_lowered.onnx")

(1, 256, 128)
[[[15. 15. 15. ... 15. 15. 15.]
  [ 0.  0. 15. ... 15. 15.  0.]
  [15.  0. 15. ...  0. 15.  0.]
  ...
  [ 0. 15. 15. ... 15. 15. 15.]
  [ 0.  0.  0. ...  0.  0.  0.]
  [ 0. 15. 15. ... 15. 15. 15.]]]

Stopping http://0.0.0.0:8081
Serving '/tmp/quartznet_subpart_lowered.onnx' at http://0.0.0.0:8081
