In [11]:
import numpy as np

arr = np.asarray([
    [1,2,3,4,5],
    [6,7,8,9,10],
    [11,12,13,14,15],
    [16,17,18,19,20],
    [21,22,23,24,25],
    [26,27,28,29,30]
])

arr

array([[ 1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10],
       [11, 12, 13, 14, 15],
       [16, 17, 18, 19, 20],
       [21, 22, 23, 24, 25],
       [26, 27, 28, 29, 30]])

In [14]:
from qonnx.util.basic import interleave_matrix_outer_dim_from_partitions

pe = 2

arr_modified = interleave_matrix_outer_dim_from_partitions(arr, pe)

arr_modified = arr_modified.reshape(1, pe, int(5*6/pe), 1)

arr_modified


array([[[[ 1.],
         [ 2.],
         [ 3.],
         [ 4.],
         [ 5.],
         [11.],
         [12.],
         [13.],
         [14.],
         [15.],
         [21.],
         [22.],
         [23.],
         [24.],
         [25.]],

        [[ 6.],
         [ 7.],
         [ 8.],
         [ 9.],
         [10.],
         [16.],
         [17.],
         [18.],
         [19.],
         [20.],
         [26.],
         [27.],
         [28.],
         [29.],
         [30.]]]], dtype=float32)

In [44]:
import numpy as np
import onnx
import onnx.helper as oh
import onnx.numpy_helper as np_helper
from onnx import TensorProto
from pkgutil import get_data

import qonnx.core.onnx_exec as oxe
from qonnx.core.datatype import DataType
from qonnx.core.modelwrapper import ModelWrapper
from qonnx.custom_op.general.im2col import compute_conv_output_dim
from qonnx.custom_op.registry import getCustomOp
from qonnx.transformation.infer_shapes import InferShapes
from qonnx.transformation.lower_convs_to_matmul import LowerConvsToMatMul
from qonnx.util.basic import gen_finn_dt_tensor, qonnx_make_model

ifm_dim_h = 4
ifm_dim_w = 4
k_h = 2
k_w = 2
ifm_ch = 3
idt = DataType["INT4"]
wdt = DataType["INT4"]
odt = DataType["INT32"]
ofm_ch = 3
pad_h = 0
pad_w = 0
stride_h = 1
stride_w = 1
dilations = [1, 1]
padding = [0, 0, 0, 0]

ofm_dim_h = compute_conv_output_dim(
    ifm_dim_h,
    k_h,
    stride_h,
    pad_h,
    dilations[0],
)
ofm_dim_w = compute_conv_output_dim(
    ifm_dim_w,
    k_w,
    stride_w,
    pad_w,
    dilations[1],
)

# set up onnx model
inp = oh.make_tensor_value_info("inp", TensorProto.FLOAT, [1, ifm_ch, ifm_dim_h, ifm_dim_w])
outp = oh.make_tensor_value_info("outp", TensorProto.FLOAT, [1, ofm_ch, ofm_dim_h, ofm_dim_w])

W = oh.make_tensor_value_info("W", TensorProto.FLOAT, [ofm_ch, 1, k_h, k_w])
group = ifm_ch

dw_cnv = oh.make_node(
    "Conv",
    inputs=["inp", "W"],
    outputs=["outp"],
    kernel_shape=[k_h, k_w],
    pads=padding,
    strides=[stride_h, stride_w],
    group=group,
    dilations=dilations,
)
graph = oh.make_graph(
    nodes=[dw_cnv],
    name="dw_cnv_graph",
    inputs=[inp],
    outputs=[outp],
    value_info=[W],
)

model = qonnx_make_model(graph, producer_name="test_dws_reg_cnv-model")
model = ModelWrapper(model)
model.set_tensor_datatype("inp", idt)
model.set_tensor_datatype("outp", odt)
model.set_tensor_datatype("W", wdt)

model.save("conv.onnx")

w_tensor = gen_finn_dt_tensor(wdt, [ofm_ch, 1, k_h, k_w])

model.set_initializer("W", w_tensor)
model = model.transform(InferShapes())

input_tensor = gen_finn_dt_tensor(idt, [1, ifm_ch, ifm_dim_h, ifm_dim_w])
input_dict = {"inp": input_tensor}
output_dict = oxe.execute_onnx(model, input_dict)
expected = output_dict["outp"]

model = model.transform(LowerConvsToMatMul())
model.save("vvu_model.onnx")
#assert len(model.get_nodes_by_op_type("Conv")) == 0, "Found Conv nodes after lowering"
output_dict = oxe.execute_onnx(model, input_dict, True)
#produced = output_dict["outp"]
#assert (produced == expected).all()

import finn.transformation.fpgadataflow.convert_to_hls_layers as to_hls

model = model.transform(to_hls.InferConvInpGen())
model = model.transform(to_hls.InferVectorVectorActivation())

for n in model.graph.node:
    if n.op_type=="ConvolutionInputGenerator":
        getCustomOp(n).set_nodeattr("SIMD", 1)
    if n.op_type=="VectorVectorActivation":
        getCustomOp(n).set_nodeattr("PE", 1)

model.save("vvu_hls.onnx")

from qonnx.transformation.general import GiveUniqueNodeNames
from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim
from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode

model = model.transform(SetExecMode("rtlsim"))
model = model.transform(GiveUniqueNodeNames())
model = model.transform(PrepareIP("xc7z020clg400-1", 5))
model = model.transform(HLSSynthIP())
model = model.transform(PrepareRTLSim())

output_dict_hls = oxe.execute_onnx(model, input_dict, True)

# check if created nodes have attributes that indicate depthwise conv
#assert model.get_tensor_sparsity("W") is not None
#im2col_node = getCustomOp(model.graph.node[1])
#assert im2col_node.get_nodeattr("depthwise") == 1

                                                                                                                       : ... In instance VectorVectorActivation_0.flow_control_loop_pipe_no_ap_cont_U
  868 | #0 ap_loop_init = 1'b1;
      |  ^
                  ... Use "/* verilator lint_off STMTDLY */" and lint_on around source to disable this message.
                                                                                                                       : ... In instance VectorVectorActivation_0.flow_control_loop_pipe_no_ap_cont_U
  869 | #0 ap_done_cache = 1'b0;
      |  ^
                                                                                                                       : ... In instance VectorVectorActivation_0
  196 | #0 ap_CS_iter0_fsm = 1'd1;
      |  ^
                                                                                                                       : ... In instance VectorVectorActivation_0
  197 | #0 ap_CS_iter1_fsm = 2'd1;


                                                                                                                            : ... In instance ConvolutionInputGenerator_0.grp_ConvolutionInputGenerator_dws_2u_3u_4u_4u_3u_1u_1u_ap_resource_lutram_s_fu_28.flow_control_loop_pipe_sequential_init_U
   94 | #0 ap_loop_init_int = 1'b1;
      |  ^
                  ... Use "/* verilator lint_off STMTDLY */" and lint_on around source to disable this message.
                                                                                                                            : ... In instance ConvolutionInputGenerator_0.grp_ConvolutionInputGenerator_dws_2u_3u_4u_4u_3u_1u_1u_ap_resource_lutram_s_fu_28.flow_control_loop_pipe_sequential_init_U
   95 | #0 ap_done_cache = 1'b0;
      |  ^
                                                                                                                             : ... In instance ConvolutionInputGenerator_0.grp_ConvolutionInputGenerator_dws_2u_3u_

make: Entering directory '/scratch/mirzam/build_files/pyverilator_VectorVectorActivation_0_8vzzd2ws'
ccache g++  -I.  -MMD -I/usr/local/share/verilator/include -I/usr/local/share/verilator/include/vltstd -DVM_COVERAGE=0 -DVM_SC=0 -DVM_TRACE=1 -DVM_TRACE_FST=0 -DVM_TRACE_VCD=1 -faligned-new -Wno-bool-operation -Wno-sign-compare -Wno-uninitialized -Wno-unused-but-set-variable -Wno-unused-parameter -Wno-unused-variable -Wno-shadow     -fPIC --std=c++11  -std=gnu++14 -Os -c -o pyverilator_wrapper.o /scratch/mirzam/build_files/pyverilator_VectorVectorActivation_0_8vzzd2ws/pyverilator_wrapper.cpp
ccache g++  -I.  -MMD -I/usr/local/share/verilator/include -I/usr/local/share/verilator/include/vltstd -DVM_COVERAGE=0 -DVM_SC=0 -DVM_TRACE=1 -DVM_TRACE_FST=0 -DVM_TRACE_VCD=1 -faligned-new -Wno-bool-operation -Wno-sign-compare -Wno-uninitialized -Wno-unused-but-set-variable -Wno-unused-parameter -Wno-unused-variable -Wno-shadow     -fPIC --std=c++11  -std=gnu++14 -Os -c -o verilated.o /usr/local/sh

In [45]:
output_dict_hls.keys()

dict_keys(['inp', 'outp', 'ZJnSjC', 'W', '451imY', 'QF2fDY'])

In [46]:
output_dict_hls["ZJnSjC"]

array([[[[ 6., -5., -6.],
         [-6., -7., -1.],
         [-8.,  7., -3.],
         [ 6., -2.,  2.]],

        [[-4.,  4.,  6.],
         [ 4.,  5.,  6.],
         [-3., -4., -1.],
         [-7., -5., -5.]],

        [[-6., -6.,  1.],
         [ 6.,  2.,  3.],
         [-4.,  7.,  3.],
         [-6., -8.,  2.]],

        [[-3.,  0., -5.],
         [ 7.,  4.,  3.],
         [ 0.,  6., -7.],
         [-2., -1.,  2.]]]], dtype=float32)

In [47]:
output_dict_hls["451imY"]

array([[[[ 6., -6., -4.,  4., -5., -7.,  4.,  5., -6., -1.,  6.,  6.],
         [-6., -8.,  4., -3., -7.,  7.,  5., -4., -1., -3.,  6., -1.],
         [-8.,  6., -3., -7.,  7., -2., -4., -5., -3.,  2., -1., -5.]],

        [[-4.,  4., -6.,  6.,  4.,  5., -6.,  2.,  6.,  6.,  1.,  3.],
         [ 4., -3.,  6., -4.,  5., -4.,  2.,  7.,  6., -1.,  3.,  3.],
         [-3., -7., -4., -6., -4., -5.,  7., -8., -1., -5.,  3.,  2.]],

        [[-6.,  6., -3.,  7., -6.,  2.,  0.,  4.,  1.,  3., -5.,  3.],
         [ 6., -4.,  7.,  0.,  2.,  7.,  4.,  6.,  3.,  3.,  3., -7.],
         [-4., -6.,  0., -2.,  7., -8.,  6., -1.,  3.,  2., -7.,  2.]]]],
      dtype=float32)

In [35]:
print(output_dict.keys())

dict_keys(['inp', 'outp', 'W', 'ygTscg', 'k2u7xa', 'F71Z2F'])


In [36]:
print(output_dict["ygTscg"])

[[[[-6.  5.  4.]
   [ 6. -6. -7.]
   [ 2.  5. -7.]
   [ 7.  6.  5.]]

  [[ 0.  5. -4.]
   [-3. -8.  4.]
   [-2. -2. -2.]
   [-6. -2. -6.]]

  [[-8.  6.  3.]
   [ 7.  6.  4.]
   [ 7.  7. -8.]
   [-1.  4. -7.]]

  [[ 3.  1.  0.]
   [-4.  3. -7.]
   [-5.  2.  6.]
   [ 0. -5. -5.]]]]


In [37]:
print(output_dict["k2u7xa"])

[[[[-6.  5.  4.  6. -6. -7.  0.  5. -4. -3. -8.  4.]
   [ 6. -6. -7.  2.  5. -7. -3. -8.  4. -2. -2. -2.]
   [ 2.  5. -7.  7.  6.  5. -2. -2. -2. -6. -2. -6.]]

  [[ 0.  5. -4. -3. -8.  4. -8.  6.  3.  7.  6.  4.]
   [-3. -8.  4. -2. -2. -2.  7.  6.  4.  7.  7. -8.]
   [-2. -2. -2. -6. -2. -6.  7.  7. -8. -1.  4. -7.]]

  [[-8.  6.  3.  7.  6.  4.  3.  1.  0. -4.  3. -7.]
   [ 7.  6.  4.  7.  7. -8. -4.  3. -7. -5.  2.  6.]
   [ 7.  7. -8. -1.  4. -7. -5.  2.  6.  0. -5. -5.]]]]


# MMV SWG

In [10]:
from onnx import TensorProto, helper
from qonnx.core.datatype import DataType
from qonnx.core.modelwrapper import ModelWrapper
from qonnx.custom_op.general.im2col import compute_conv_output_dim
from qonnx.transformation.general import GiveUniqueNodeNames
from qonnx.util.basic import gen_finn_dt_tensor, qonnx_make_model

import finn.core.onnx_exec as oxe
from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim
from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode


def make_single_im2col_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, stride, dilation, idt):
    k_h, k_w = k
    ifm_dim_h, ifm_dim_w = ifm_dim
    stride_h, stride_w = stride
    dilation_h, dilation_w = dilation
    ofm_dim_h, ofm_dim_w = ofm_dim

    odt = idt
    inp = helper.make_tensor_value_info("inp", TensorProto.FLOAT, [1, ifm_dim_h, ifm_dim_w, ifm_ch])
    outp = helper.make_tensor_value_info(
        "outp", TensorProto.FLOAT, [1, ofm_dim_h, ofm_dim_w, k_h * k_w * ifm_ch]
    )

    im2col_node = helper.make_node(
        "Im2Col",
        ["inp"],
        ["outp"],
        domain="finn.custom_op.general",
        stride=[stride_h, stride_w],
        kernel_size=[k_h, k_w],
        input_shape=str((1, ifm_dim_h, ifm_dim_w, ifm_ch)),
        dilations=[dilation_h, dilation_w],
        pad_amount=[0, 0, 0, 0],
        pad_value=0,
    )
    graph = helper.make_graph(
        nodes=[im2col_node], name="im2col_graph", inputs=[inp], outputs=[outp]
    )

    model = qonnx_make_model(graph, producer_name="im2col-model")
    model = ModelWrapper(model)

    model.set_tensor_datatype("inp", idt)
    model.set_tensor_datatype("outp", odt)

    return model


def make_single_slidingwindow_modelwrapper(
    k, ifm_ch, ifm_dim, ofm_dim, simd, m, parallel_window, stride, dilation, idt, dw=0
):
    k_h, k_w = k
    ifm_dim_h, ifm_dim_w = ifm_dim
    stride_h, stride_w = stride
    dilation_h, dilation_w = dilation
    ofm_dim_h, ofm_dim_w = ofm_dim

    odt = idt
    inp = helper.make_tensor_value_info("inp", TensorProto.FLOAT, [1, ifm_dim_h, ifm_dim_w, ifm_ch])
    outp = helper.make_tensor_value_info(
        "outp", TensorProto.FLOAT, [1, ofm_dim_h, ofm_dim_w, k_h * k_w * ifm_ch]
    )

    SlidingWindow_node = helper.make_node(
        "ConvolutionInputGenerator_rtl",
        ["inp"],
        ["outp"],
        domain="finn.custom_op.fpgadataflow",
        backend="fpgadataflow",
        ConvKernelDim=[k_h, k_w],
        IFMChannels=ifm_ch,
        IFMDim=[ifm_dim_h, ifm_dim_w],
        OFMDim=[ofm_dim_h, ofm_dim_w],
        SIMD=simd,
        M=m,
        parallel_window=parallel_window,
        Stride=[stride_h, stride_w],
        Dilation=[dilation_h, dilation_w],
        inputDataType=idt.name,
        outputDataType=odt.name,
        depthwise=dw,
    )
    graph = helper.make_graph(
        nodes=[SlidingWindow_node],
        name="slidingwindow_graph",
        inputs=[inp],
        outputs=[outp],
    )

    model = qonnx_make_model(graph, producer_name="slidingwindow-model")
    model = ModelWrapper(model)

    model.set_tensor_datatype("inp", idt)
    model.set_tensor_datatype("outp", odt)

    return model


def prepare_inputs(input_tensor):
    return {"inp": input_tensor}


def test_fpgadataflow_slidingwindow_rtl(
    idt, k, ifm_dim, ifm_ch, stride, dilation, dw, simd, m, parallel_window, flip
):
    if flip:
        if (
            ifm_dim[0] == ifm_dim[1]
            and k[0] == k[1]
            and stride[0] == stride[1]
            and dilation[0] == dilation[1]
        ):
            pytest.skip("Dimension flip would have no effect")
        k = k[::-1]
        ifm_dim = ifm_dim[::-1]
        stride = stride[::-1]
        dilation = dilation[::-1]

    k_h, k_w = k
    ifm_dim_h, ifm_dim_w = ifm_dim
    stride_h, stride_w = stride
    dilation_h, dilation_w = dilation

    kernel_width = (k_w - 1) * dilation_w + 1  # incl. dilation
    kernel_height = (k_h - 1) * dilation_h + 1  # incl. dilation

    ofm_dim_h = compute_conv_output_dim(ifm_dim_h, k_h, stride_h, 0, dilation_h)
    ofm_dim_w = compute_conv_output_dim(ifm_dim_w, k_w, stride_w, 0, dilation_w)
    ofm_dim = [ofm_dim_h, ofm_dim_w]

    x = gen_finn_dt_tensor(idt, (1, ifm_dim_h, ifm_dim_w, ifm_ch))
    model = make_single_slidingwindow_modelwrapper(
        k=k,
        ifm_ch=ifm_ch,
        ifm_dim=ifm_dim,
        ofm_dim=ofm_dim,
        simd=simd,
        m=m,
        parallel_window=parallel_window,
        stride=stride,
        dilation=dilation,
        idt=idt,
        dw=dw,
    )

    model = model.transform(SetExecMode("rtlsim"))
    model = model.transform(GiveUniqueNodeNames())
    model = model.transform(PrepareIP("xc7z020clg400-1", 5))
    model = model.transform(PrepareRTLSim())
    
    model.save("swg.onnx")

    # prepare input data
    input_dict = prepare_inputs(x)
    # execute model
    y_produced = oxe.execute_onnx(model, input_dict)["outp"]
    golden = make_single_im2col_modelwrapper(
        k=k,
        ifm_ch=ifm_ch,
        ifm_dim=ifm_dim,
        ofm_dim=ofm_dim,
        stride=stride,
        dilation=dilation,
        idt=idt,
    )
    y_expected = oxe.execute_onnx(golden, input_dict)["outp"]

    if dw == 0:
        assert (y_produced == y_expected).all()
    else:
        y_expected = y_expected.reshape(1, ofm_dim_h, ofm_dim_w, k_h * k_w, ifm_ch // simd, simd)
        y_expected = y_expected.transpose(0, 1, 2, 4, 3, 5)
        y_expected = y_expected.reshape(1, ofm_dim_h, ofm_dim_w, ifm_ch * k_h * k_w)
        #assert (y_produced == y_expected).all()
        
    return input_dict, y_produced, y_expected


In [20]:
idt = DataType["UINT4"]
k = [2,2]
ifm_dim= [3,3]
ifm_ch = 4
stride = [1,1]
dilation = [1,1]
dw = 1
simd = 4
parallel_window = 1
m = 2
flip = False

in_dict, y_produced, y_expected = test_fpgadataflow_slidingwindow_rtl(idt, k, ifm_dim, ifm_ch, stride, dilation, dw, simd, parallel_window, m, flip)

AssertionError: parallel_window = 2 not in {0, 1}

SIMD=4, MMV=1

In [16]:
in_dict['inp']

array([[[[ 4., 12.,  7.,  8.],
         [12., 11.,  8.,  4.],
         [ 8., 13., 15., 15.]],

        [[10., 12., 14.,  0.],
         [13., 14., 14., 11.],
         [ 3.,  7.,  1.,  6.]],

        [[ 5.,  9.,  6.,  0.],
         [13., 10.,  0.,  7.],
         [ 6., 12.,  5.,  4.]]]], dtype=float32)

In [13]:
y_produced

array([[[[ 0.,  0.,  0.,  0.,  4., 12.,  7.,  8., 10., 12., 14.,  0.,
          13., 14., 14., 11.],
         [ 4., 12.,  7.,  8., 12., 11.,  8.,  4., 13., 14., 14., 11.,
           3.,  7.,  1.,  6.]],

        [[ 8., 13., 15., 15., 10., 12., 14.,  0.,  5.,  9.,  6.,  0.,
          13., 10.,  0.,  7.],
         [10., 12., 14.,  0., 13., 14., 14., 11., 13., 10.,  0.,  7.,
           6., 12.,  5.,  4.]]]], dtype=float32)

In [19]:
y_expected

array([[[[ 4., 12.,  7.,  8., 12., 11.,  8.,  4., 10., 12., 14.,  0.,
          13., 14., 14., 11.],
         [12., 11.,  8.,  4.,  8., 13., 15., 15., 13., 14., 14., 11.,
           3.,  7.,  1.,  6.]],

        [[10., 12., 14.,  0., 13., 14., 14., 11.,  5.,  9.,  6.,  0.,
          13., 10.,  0.,  7.],
         [13., 14., 14., 11.,  3.,  7.,  1.,  6., 13., 10.,  0.,  7.,
           6., 12.,  5.,  4.]]]], dtype=float32)

SIMD=4, MMV=2