In [3]:
import numpy as np

from onnx import TensorProto, helper

import finn.core.onnx_exec as oxe
from finn.core.datatype import DataType
from finn.core.modelwrapper import ModelWrapper
from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim
from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim
from finn.transformation.general import GiveUniqueNodeNames
from finn.util.basic import gen_finn_dt_tensor

from finn.custom_op.registry import getCustomOp
from finn.analysis.fpgadataflow.exp_cycles_per_layer import exp_cycles_per_layer
from finn.analysis.fpgadataflow.res_estimation import res_estimation
from finn.analysis.fpgadataflow.hls_synth_res_estimation import hls_synth_res_estimation
from finn.custom_op.general.im2col import compute_conv_output_dim


def make_single_im2col_modelwrapper(
    k_h, k_w, ifm_ch, ifm_dim_h, ifm_dim_w, ofm_dim_h, ofm_dim_w,
    simd, stride_h, stride_w, dilation, idt
):
    odt = idt
    inp = helper.make_tensor_value_info(
        "inp", TensorProto.FLOAT, [1, ifm_dim_h, ifm_dim_w, ifm_ch]
    )
    outp = helper.make_tensor_value_info(
        "outp", TensorProto.FLOAT, [1, ofm_dim_h, ofm_dim_w, k_h * k_w * ifm_ch]
    )
    
    # Assume dilation of 1 along 'dummy' dimension always
    if k_h==1 and ifm_dim_h==1 and ofm_dim_h==1:
        dilation_h = 1
        dilation_w = dilation
    else:
        dilation_w = dilation
        dilation_h = dilation

    im2col_node = helper.make_node(
        "Im2Col",
        ["inp"],
        ["outp"],
        domain="finn.custom_op.general",
        backend="fpgadataflow",
        stride=[stride_h, stride_w],
        kernel_size=[k_h, k_w],
        input_shape=str((1, ifm_dim_h, ifm_dim_w, ifm_ch)),
        dilations=[dilation_h, dilation_w],
        pad_amount=[0, 0, 0, 0],
        pad_value=0,
    )
    graph = helper.make_graph(
        nodes=[im2col_node], name="im2col_graph", inputs=[inp], outputs=[outp]
    )

    model = helper.make_model(graph, producer_name="im2col-model")
    model = ModelWrapper(model)

    model.set_tensor_datatype("inp", idt)
    model.set_tensor_datatype("outp", odt)

    return model


def make_single_slidingwindow_modelwrapper(
    k_h, k_w, ifm_ch, ifm_dim_h, ifm_dim_w, ofm_dim_h, ofm_dim_w, simd,
    stride_h, stride_w, dilation, idt, dw=0
):
    odt = idt
    inp = helper.make_tensor_value_info(
        "inp", TensorProto.FLOAT, [1, ifm_dim_h, ifm_dim_w, ifm_ch]
    )
    outp = helper.make_tensor_value_info(
        "outp", TensorProto.FLOAT, [1, ofm_dim_h, ofm_dim_w, k_h * k_w * ifm_ch]
    )
    
    # Assume dilation of 1 along 'dummy' dimension always
    if k_h==1 and ifm_dim_h==1 and ofm_dim_h==1:
        dilation_h = 1
        dilation_w = dilation
    else:
        dilation_w = dilation
        dilation_h = dilation

    SlidingWindow_node = helper.make_node(
        "ConvolutionInputGenerator1D",
        ["inp"],
        ["outp"],
        domain="finn.custom_op.fpgadataflow",
        backend="fpgadataflow",
        ConvKernelDim=[k_h, k_w],
        IFMChannels=ifm_ch,
        IFMDim=[ifm_dim_h, ifm_dim_w],
        OFMDim=[ofm_dim_h, ofm_dim_w],
        SIMD=simd,
        Stride=[stride_h, stride_w],
        Dilation=[dilation_h, dilation_w],
        inputDataType=idt.name,
        outputDataType=odt.name,
        depthwise=dw,
    )
    graph = helper.make_graph(
        nodes=[SlidingWindow_node],
        name="slidingwindow_graph",
        inputs=[inp],
        outputs=[outp],
    )

    model = helper.make_model(graph, producer_name="slidingwindow-model")
    model = ModelWrapper(model)

    model.set_tensor_datatype("inp", idt)
    model.set_tensor_datatype("outp", odt)

    return model


def prepare_inputs(input_tensor):
    return {"inp": input_tensor}


def test_fpgadataflow_slidingwindow(
    idt, k_h, k_w, ifm_dim_h, ifm_dim_w, ifm_ch, stride_h, stride_w, dilation, exec_mode, simd, dw
):
    ofm_dim_h = compute_conv_output_dim(ifm_dim_h, k_h, stride_h, 0, dilation)
    ofm_dim_w = compute_conv_output_dim(ifm_dim_w, k_w, stride_w, 0, dilation)

    x = gen_finn_dt_tensor(idt, (1, ifm_dim_h, ifm_dim_w, ifm_ch))
    
    model = make_single_slidingwindow_modelwrapper(
        k_h = k_h, k_w = k_w, ifm_ch = ifm_ch, ifm_dim_h = ifm_dim_h, ifm_dim_w = ifm_dim_w,
        ofm_dim_h = ofm_dim_h, ofm_dim_w = ofm_dim_w, simd = simd, stride_h = stride_h,
        stride_w = stride_w, dilation=dilation, idt=idt, dw=dw
    )
    
    model.save("/tmp/test_convInputGenerator.onnx")

    if exec_mode == "cppsim":
        model = model.transform(SetExecMode("cppsim"))
        model = model.transform(PrepareCppSim())
        model = model.transform(CompileCppSim())
        model.save("/tmp/test_convInputGenerator_compiled.onnx")
    elif exec_mode == "rtlsim":
        model = model.transform(SetExecMode("rtlsim"))
        model = model.transform(GiveUniqueNodeNames())
        model = model.transform(PrepareIP("xcu250-figd2104-2L-e", 5))
        model = model.transform(HLSSynthIP())
        model = model.transform(PrepareRTLSim())
        model.save("/tmp/test_convInputGenerator_compiled_rtlsim.onnx")
    else:
        raise Exception("Unknown exec_mode in test_fpgadataflow_slidingwindow")

    # prepare input data
    input_dict = prepare_inputs(x)
    # execute model
    y_produced = oxe.execute_onnx(model, input_dict)["outp"]
    golden = make_single_im2col_modelwrapper(
        k_h = k_h, k_w = k_w, ifm_ch = ifm_ch, ifm_dim_h = ifm_dim_h, ifm_dim_w = ifm_dim_w,
        ofm_dim_h = ofm_dim_h, ofm_dim_w = ofm_dim_w, simd = simd, stride_h = stride_h,
        stride_w = stride_w, dilation = dilation, idt = idt
    )
    golden.save("/tmp/test_convInputGenerator_im2col.onnx")
    y_expected = oxe.execute_onnx(golden, input_dict)["outp"]
    
    if dw == 0:
        assert (y_produced == y_expected).all()
        return x, y_produced, y_expected
    else:
        y_expected = y_expected.reshape(
            1, ofm_dim_h, ofm_dim_w, k_h * k_w, ifm_ch // simd, simd
        )
        y_expected = y_expected.transpose(0, 1, 2, 4, 3, 5)
        y_expected = y_expected.reshape(1, ofm_dim_h, ofm_dim_w, ifm_ch * k_h * k_w)        
        assert (y_produced == y_expected).all()
        return x, y_produced, y_expected

    if exec_mode == "rtlsim":
        node = model.get_nodes_by_op_type("ConvolutionInputGenerator1D")[0]
        inst = getCustomOp(node)
        cycles_rtlsim = inst.get_nodeattr("cycles_rtlsim")
        exp_cycles_dict = model.analysis(exp_cycles_per_layer)
        exp_cycles = exp_cycles_dict[node.name]
        print("---------------\n{}\t{}".format(cycles_rtlsim, exp_cycles))
        assert np.isclose(exp_cycles, cycles_rtlsim, atol=10)
        assert exp_cycles != 0
        
        exp_resources = model.analysis(res_estimation)
        hls_resources = model.analysis(hls_synth_res_estimation)
        print("---------------\nExp:{}\nHLS:{}".format(exp_resources, hls_resources))
        

In [5]:
# config = [IH, IW, KH, KW, SH, SW, ICH, D, "cppsim", SIMD=1, DW]

config1 = [288, 1, 33, 1, 2, 2, 64, 1, "cppsim", 1, 1]  # fails
config1_T = [1, 288, 1, 33, 1, 2, 64, 1, "cppsim", 1, 1]  # fails
# The one that works:
config1 = [1,   288, 1,  33, 2, 2, 64, 1, "cppsim", 1, 1] # works ConvolutionInputGenerator_NonSquare_dws

config2 = [160, 1, 33, 1, 1, 1, 256, 1, "rtlsim", 1, 1] # works ConvolutionInputGenerator_NonSquare_dws
config2_T = [1, 160, 1, 33, 1, 1, 256, 1, "cppsim", 1, 1] # works ConvolutionInputGenerator_NonSquare_dws

config3 = [166, 1, 39, 1, 1, 1, 256, 1, "cppsim", 1, 1] # works ConvolutionInputGenerator_NonSquare_dws
config3_T = [1, 166, 1, 39, 1, 1, 256, 1, "cppsim", 1, 1] # works ConvolutionInputGenerator_NonSquare_dws

config4 = [178, 1, 51, 1, 1, 1, 256, 1, "cppsim", 1, 1] # works ConvolutionInputGenerator_NonSquare_dws
config4_T = [1, 178, 1, 51, 1, 1, 256, 1, "cppsim", 1, 1] # works ConvolutionInputGenerator_NonSquare_dws

config5 = [178, 1, 51, 1, 1, 1, 512, 1, "cppsim", 1, 1] # works ConvolutionInputGenerator_NonSquare_dws
config5_T = [1, 178, 1, 51, 1, 1, 512, 1, "cppsim", 1, 1] # works ConvolutionInputGenerator_NonSquare_dws

config6 = [190, 1, 63, 1, 1, 1, 512, 1, "cppsim", 1, 1] # works ConvolutionInputGenerator_NonSquare_dws
config6_T = [1, 190, 1, 63, 1, 1, 512, 1, "cppsim", 1, 1] # works ConvolutionInputGenerator_NonSquare_dws

config7 = [202, 1, 75, 1, 1, 1, 512, 1, "cppsim", 1, 1] # works ConvolutionInputGenerator_NonSquare_dws
config7_T = [1, 202, 1, 75, 1, 1, 512, 1, "cppsim", 1, 1] # works ConvolutionInputGenerator_NonSquare_dws

config8 = [300, 1, 87, 1, 1, 1, 512, 2, "cppsim", 1, 1] # fails ConvolutionInputGenerator_NonSquare_Dilated
config8_T = [1, 300, 1, 87, 1, 1, 512, 2, "cppsim", 1, 1] # fails ConvolutionInputGenerator_NonSquare_Dilated
# The one that works:
config8 = [1, 300, 1, 87, 1, 1, 512, 2, "cppsim", 1, 1] # works ConvolutionInputGenerator_NonSquare_dws

##### After changing [H, W] -> [X, Y] assumption
#config = [10, 1, 2, 1, 1, 1, 3, 1, "cppsim", 1, 1]

config = [160, 1, 33, 1, 1, 1, 256, 1, 'cppsim', 1, 1]

#config = [288, 1, 33, 1, 2, 1, 64, 1, "cppsim", 1, 1]
#x, y_produced, y_expected = test_fpgadataflow_slidingwindow(DataType.INT8, config[2], config[3], config[0], config[1], config[6],
#                                config[4], config[5], config[7], config[8], config[9], config[10])

x, y_produced, y_expected = test_fpgadataflow_slidingwindow(DataType.INT8, config[2], config[3], config[0], config[1], config[6],
                                config[4], config[5], config[7], config[8], config[9], config[10])



In [None]:
assert((y_produced==y_expected).all())

In [24]:
print(x)

[[[[-66.  -6.]
   [ 62. 103.]]

  [[ 50. -73.]
   [ 94.  75.]]]]


In [31]:
print(np.shape(y_expected))
print(y_expected)

(1, 4, 2, 12)
[[[[ -11.  -50.  -98.  -91.  -95.  -90. -121.   80.  -40. -105.  -99.
     -27.]
   [ -95.  -90.  -78.  -63.  -64.  -83.  -99.  -27.  111.  125.  110.
    -109.]]

  [[  70. -116. -123.   30.  -85.  -30. -114.   27.  100.  100.   20.
      23.]
   [ -85.  -30.  -82.  106.  110.  -29.   20.   23.  -59. -106.   67.
     -61.]]

  [[ -13.    6.  -27.    0.  110. -122.  -36. -127.   26.  112.  -46.
       0.]
   [ 110. -122.  -55. -104.   21.   -7.  -46.    0.  -57.  123.   50.
      88.]]

  [[ 109.   37. -110.  -44. -110. -124.  -43.   82.  -95.   -3.   57.
      11.]
   [-110. -124.  -58. -109.   85.   -3.   57.   11.   53.  -31.   23.
     119.]]]]


In [32]:
print(np.shape(y_produced))
print(y_produced)

(1, 4, 2, 12)
[[[[ -11.  -50.  -98.  -91.  -95.  -90.  -99.  -27.  111.  125.  110.
    -109.]
   [ -95.  -90.  -78.  -63.  -64.  -83.  110. -109.   25. -115.   70.
    -116.]]

  [[  70. -116. -123.   30.  -85.  -30.   20.   23.  -59. -106.   67.
     -61.]
   [ -85.  -30.  -82.  106.  110.  -29.   67.  -61.  -30.  105.   56.
    -107.]]

  [[ -13.    6.  -27.    0.  110. -122.  -46.    0.  -57.  123.   50.
      88.]
   [ 110. -122.  -55. -104.   21.   -7.   50.   88. -113.  -35.  109.
      37.]]

  [[ 109.   37. -110.  -44. -110. -124.   57.   11.   53.  -31.   23.
     119.]
   [-110. -124.  -58. -109.   85.   -3.   23.  119.  -17.    4.   56.
    -107.]]]]


In [10]:
from finn.util.visualization import showInNetron

showInNetron("/tmp/test_convInputGenerator_compiled_rtlsim.onnx")

OSError: [Errno 98] Address already in use

In [21]:
from finn.util.visualization import showInNetron

#showInNetron("/tmp/test_convInputGenerator.onnx")

showInNetron("/tmp/test_convInputGenerator_compiled.onnx")


Stopping http://0.0.0.0:8081
Serving '/tmp/test_convInputGenerator_compiled.onnx' at http://0.0.0.0:8081


In [22]:
showInNetron("/tmp/test_convInputGenerator_im2col.onnx")

Stopping http://0.0.0.0:8081
Serving '/tmp/test_convInputGenerator_im2col.onnx' at http://0.0.0.0:8081
