In [1]:
import os
import numpy as np

from onnx import TensorProto, helper
from finn.core.datatype import DataType
from finn.core.modelwrapper import ModelWrapper
from finn.util.basic import gen_finn_dt_tensor
import finn.core.onnx_exec as oxe
from finn.transformation.infer_shapes import InferShapes
from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
from finn.transformation.general import GiveUniqueNodeNames
from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim
from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim
from finn.custom_op.registry import getCustomOp
from finn.analysis.fpgadataflow.exp_cycles_per_layer import exp_cycles_per_layer

from finn.util.basic import pynq_part_map

test_pynq_board = os.getenv("PYNQ_BOARD", default="Pynq-Z1")
test_fpga_part = pynq_part_map[test_pynq_board]
target_clk_ns = 10


def make_single_fmpadding_modelwrapper(idim, padding, num_ch, simd, idt, pad_style):
    pad_h = padding[0]+padding[2]
    pad_w = padding[1]+padding[3]
    idim_h = idim[0]
    idim_w = idim[1]
    
    assert pad_style == 2, "only pad_style == 2 supported in hlslib"
    assert(pad_h > 0 or pad_w > 0), "Output dim should be greater than input dim"
    odim_h = idim_h + pad_h
    odim_w = idim_w + pad_w

    inp = helper.make_tensor_value_info(
        "inp", TensorProto.FLOAT, [1, idim_h, idim_w, num_ch]
    )
    outp = helper.make_tensor_value_info(
        "outp", TensorProto.FLOAT, [1, odim_h, odim_w, num_ch]
    )

    FMPadding = helper.make_node(
        "FMPadding_Batch",
        ["inp"],
        ["outp"],
        domain="finn.custom_op.fpgadataflow",
        backend="fpgadataflow",
        ImgDim=idim,
        Padding=padding,
        NumChannels=num_ch,
        inputDataType=str(idt.name),
        PaddingStyle=pad_style,
        numInputVectors=1,
        SIMD=simd,
    )

    graph = helper.make_graph(
        nodes=[FMPadding], name="fmpadding_graph", inputs=[inp], outputs=[outp]
    )

    model = helper.make_model(graph, producer_name="fmpadding-model")
    model = ModelWrapper(model)

    model.set_tensor_datatype("inp", idt)
    model.set_tensor_datatype("outp", idt)

    model.save("/tmp/test_fmpadding.onnx")
    return model


def test_fpgadataflow_fmpadding(idim, pad, num_ch, simd, pad_style, idt, mode):
    if num_ch % simd != 0:
        pytest.skip(" num_ch % simd != 0, skipping")
        
    idim_h = idim[0]
    idim_w = idim[1]
    pad_h = pad[0]+pad[2]
    pad_w = pad[1]+pad[3]
    # generate input data
    x = gen_finn_dt_tensor(idt, [1, idim_h, idim_w, num_ch])
    input_dict = {"inp": x}
    odim_h = idim_h + pad_h
    odim_w = idim_w + pad_w

    model = make_single_fmpadding_modelwrapper(idim, pad, num_ch, simd, idt, pad_style)
    model = model.transform(InferShapes())
    model = model.transform(SetExecMode(mode))
    model = model.transform(GiveUniqueNodeNames())
    if mode == "cppsim":
        model = model.transform(PrepareCppSim())
        model = model.transform(CompileCppSim())
        model.save("/tmp/test_fmpadding_compiled.onnx")
    elif mode == "rtlsim":
        model = model.transform(PrepareIP(test_fpga_part, target_clk_ns))
        model = model.transform(HLSSynthIP())
        model = model.transform(PrepareRTLSim())
    y_produced = oxe.execute_onnx(model, input_dict)["outp"]
    expected_oshape = (1, odim_h, odim_w, num_ch)
    assert y_produced.shape == expected_oshape

    # calculate reference
    # calculate correct pad according to parameters
    if pad_style == 2:
        if pad_h % 2 == 0:
            pad_up = pad_h // 2
        else:
            pad_up = pad_h // 2 + 1
        if pad_w % 2 == 0:
            pad_left = pad_w // 2
        else:
            pad_left = pad_w // 2 + 1
    else:
        pad_up = pad_h // 2
        pad_left = pad_w // 2

    pad_down = pad_h - pad_up
    pad_right = pad_w - pad_left

    y_expected = np.pad(
        x, ((0, 0), (pad_up, pad_down), (pad_left, pad_right), (0, 0)), "constant"
    )

    assert (y_produced == y_expected).all()

    if mode == "rtlsim":
        node = model.get_nodes_by_op_type("FMPadding_Batch")[0]
        inst = getCustomOp(node)
        cycles_rtlsim = inst.get_nodeattr("cycles_rtlsim")
        exp_cycles_dict = model.analysis(exp_cycles_per_layer)
        exp_cycles = exp_cycles_dict[node.name]
        print("Exp: {}\nRTL_sim: {}".format(exp_cycles, cycles_rtlsim))
        assert np.isclose(exp_cycles, cycles_rtlsim, atol=10)
        assert exp_cycles != 0

In [2]:
# input image dimension
idim = [[8, 8]]
# number of rows and number of cols to add
pad = [[1, 1, 1, 1], [1, 1, 2, 2]] #(Total pad)
# number of channels
num_ch = [2, 4]
# Input parallelism
simd = [1, 2]
# PaddingStyle: selects behavior when (odim-idim)%2 != 0
pad_style = [2]
# FINN input datatype
idt = [DataType.INT2, DataType.INT4]
# execution mode
mode = ["cppsim", "rtlsim"]

test_fpgadataflow_fmpadding(idim[0], pad[0], num_ch[0], simd[0], pad_style[0], idt[0], mode[1])



make: Entering directory '/tmp/finn_dev_mirza/pyverilator_FMPadding_Batch_0_nexofxdf'
g++  -I.  -MMD -I/usr/share/verilator/include -I/usr/share/verilator/include/vltstd -DVL_PRINTF=printf -DVM_TRACE=1 -DVM_COVERAGE=0 -Wno-char-subscripts -Wno-parentheses-equality -Wno-sign-compare -Wno-uninitialized -Wno-unused-but-set-variable -Wno-unused-parameter -Wno-unused-variable     -fPIC --std=c++11   -c -o pyverilator_wrapper.o /tmp/finn_dev_mirza/pyverilator_FMPadding_Batch_0_nexofxdf/pyverilator_wrapper.cpp
g++  -I.  -MMD -I/usr/share/verilator/include -I/usr/share/verilator/include/vltstd -DVL_PRINTF=printf -DVM_TRACE=1 -DVM_COVERAGE=0 -Wno-char-subscripts -Wno-parentheses-equality -Wno-sign-compare -Wno-uninitialized -Wno-unused-but-set-variable -Wno-unused-parameter -Wno-unused-variable     -fPIC --std=c++11   -c -o verilated.o /usr/share/verilator/include/verilated.cpp
g++  -I.  -MMD -I/usr/share/verilator/include -I/usr/share/verilator/include/vltstd -DVL_PRINTF=printf -DVM_TRACE=1 -D

ar: creating VFMPadding_Batch_0_FMPadding_Batch_0__ALL.a


Exp:200
RTL_sim205


In [4]:
from finn.util.visualization import showInNetron

showInNetron("/tmp/test_fmpadding.onnx")

Serving '/tmp/test_fmpadding.onnx' at http://0.0.0.0:8081


In [5]:
showInNetron("/tmp/test_fmpadding_compiled.onnx")

Stopping http://0.0.0.0:8081
Serving '/tmp/test_fmpadding_compiled.onnx' at http://0.0.0.0:8081
