In [1]:
from finn.util.visualization import showInNetron
import onnx
from finn.util.test import get_test_model_trained
import brevitas.onnx as bo
from finn.core.modelwrapper import ModelWrapper
from finn.transformation.infer_shapes import InferShapes
from finn.transformation.fold_constants import FoldConstants
from finn.transformation.general import GiveReadableTensorNames, GiveUniqueNodeNames, RemoveStaticGraphInputs

build_dir = "builds"

model = ModelWrapper("classifier_1.onnx")
model = model.transform(InferShapes())
model = model.transform(FoldConstants())
model = model.transform(GiveUniqueNodeNames())
model = model.transform(GiveReadableTensorNames())
model = model.transform(RemoveStaticGraphInputs())
#model = model.transform(Change3DTo4DTensors())
model.save(build_dir + "/end2end_sentiment_tidy.onnx")

In [2]:
showInNetron(build_dir + "/end2end_sentiment_tidy.onnx")

Serving 'builds/end2end_sentiment_tidy.onnx' at http://0.0.0.0:8081


In [3]:
from finn.transformation.streamline import Streamline
from finn.transformation.change_3d_tensors_to_4d import Change3DTo4DTensors
from finn.transformation.lower_convs_to_matmul import LowerConvsToMatMul
from finn.transformation.bipolar_to_xnor import ConvertBipolarMatMulToXnorPopcount
import finn.transformation.streamline.absorb as absorb
from finn.transformation.streamline.reorder import MakeMaxPoolNHWC, MoveScalarLinearPastInvariants
from finn.transformation.infer_data_layouts import InferDataLayouts
from finn.transformation.general import RemoveUnusedTensors

model = ModelWrapper(build_dir + "/end2end_sentiment_tidy.onnx")
model = model.transform(Change3DTo4DTensors())
model = model.transform(absorb.AbsorbSignBiasIntoMultiThreshold())
model.save(build_dir + "/end2end_sentiment_streamlined.onnx")
model = model.transform(MoveScalarLinearPastInvariants())
model = model.transform(Streamline())
model = model.transform(LowerConvsToMatMul())
model = model.transform(MakeMaxPoolNHWC())
model = model.transform(absorb.AbsorbTransposeIntoMultiThreshold())
model = model.transform(ConvertBipolarMatMulToXnorPopcount())
model = model.transform(GiveUniqueNodeNames())
model.save(build_dir + "/end2end_sentiment_streamlined.onnx")


model = model.transform(Streamline())
# absorb final add-mul nodes into TopK
model = model.transform(absorb.AbsorbScalarMulAddIntoTopK())
model = model.transform(InferDataLayouts())
model = model.transform(RemoveUnusedTensors())
model.save(build_dir + "/end2end_sentiment_streamlined.onnx")




In [4]:
showInNetron(build_dir + "/end2end_sentiment_streamlined.onnx")

Stopping http://0.0.0.0:8081
Serving 'builds/end2end_sentiment_streamlined.onnx' at http://0.0.0.0:8081


In [5]:
import finn.transformation.fpgadataflow.convert_to_hls_layers as to_hls
model = model.transform(to_hls.InferChannelwiseLinearLayer())
model = model.transform(to_hls.InferLabelSelectLayer())
model = model.transform(GiveUniqueNodeNames())
model.save(build_dir + "/end2end_sentiment_post_streamlined.onnx")



In [6]:
showInNetron(build_dir + "/end2end_sentiment_post_streamlined.onnx")

Stopping http://0.0.0.0:8081
Serving 'builds/end2end_sentiment_post_streamlined.onnx' at http://0.0.0.0:8081


In [7]:
import numpy as np
import warnings
from onnx import TensorProto, helper

import finn.core.data_layout as DataLayout
from finn.core.datatype import DataType
from finn.custom_op.registry import getCustomOp
from finn.transformation.base import Transformation
from finn.transformation.fpgadataflow.minimize_accumulator_width import (
    MinimizeAccumulatorWidth,
)
from finn.transformation.general import SortGraph
from finn.transformation.infer_datatypes import InferDataTypes
from finn.transformation.infer_shapes import InferShapes
from finn.util.basic import get_by_name
from finn.util.onnx import nchw_to_nhwc

class InferConvInpGen(Transformation):
    """Convert Im2Col layers to ConvolutionInputGenerator layers."""

    def apply(self, model):
        graph = model.graph
        node_ind = 0
        graph_modified = False
        for n in graph.node:
            node_ind += 1
            if n.op_type == "Im2Col":
                i2c_input = n.input[0]
                i2c_output = n.output[0]
                i2c_in_shape = model.get_tensor_shape(i2c_input)
                i2c_out_shape = model.get_tensor_shape(i2c_output)
                dt = model.get_tensor_datatype(i2c_input)
                print(dt)
                if not dt.is_integer():
                    warnings.warn(
                        "%s : Input is not int. Can't infer ConvInpGen." % n.name
                    )
                    continue
                i2c_inst = getCustomOp(n)
                stride_h, stride_w = i2c_inst.get_nodeattr("stride")
                k_h, k_w = i2c_inst.get_nodeattr("kernel_size")
                pad_attr = i2c_inst.get_nodeattr("pad_amount")
                pad_h = pad_attr[0] + pad_attr[2]
                pad_w = pad_attr[1] + pad_attr[3]
                dilation_h, dilation_w = i2c_inst.get_nodeattr("dilations")
                # temporary checks until non-square conv support is finalized
                pad_val = i2c_inst.get_nodeattr("pad_value")
                depthwise = i2c_inst.get_nodeattr("depthwise")
                ifm_ch = i2c_in_shape[-1]
                ifm_dim_h = i2c_in_shape[1]
                ifm_dim_w = i2c_in_shape[2]
                ofm_dim_h = i2c_out_shape[1]
                ofm_dim_w = i2c_out_shape[2]

                # default params for ConvolutionInputGenerator
                ConvInpGen_node_idx = node_ind
                ConvInpGen_input = i2c_input
                ConvInpGen_idim_h = ifm_dim_h
                ConvInpGen_idim_w = ifm_dim_w

                if pad_h > 0 or pad_w > 0:
                    # if padding enabled, ensure pad_val supported by DataType
                    # assert dt.allowed(pad_val),"""FMPadding_Batch DataType
                    # must support pad_val"""
                    assert pad_val == 0, (
                        "%s : FMPadding_Batch doesn't currently support pad_val!= 0"
                        % n.name
                    )

                    odim_padding_h = ifm_dim_h + pad_h
                    odim_padding_w = ifm_dim_w + pad_w

                    padding_out = helper.make_tensor_value_info(
                        model.make_new_valueinfo_name(),
                        TensorProto.FLOAT,
                        (1, odim_padding_h, odim_padding_w, ifm_ch),
                    )
                    graph.value_info.append(padding_out)
                    padding_out = padding_out.name
                    model.set_tensor_datatype(padding_out, dt)

                    ConvInpGen_node_idx += 1
                    ConvInpGen_input = padding_out
                    ConvInpGen_idim_h = odim_padding_h
                    ConvInpGen_idim_w = odim_padding_w

                    padding_node = helper.make_node(
                        "FMPadding_Batch",
                        [i2c_input],
                        [padding_out],
                        domain="finn.custom_op.fpgadataflow",
                        backend="fpgadataflow",
                        ImgDim=[ifm_dim_h, ifm_dim_w],
                        Padding=pad_attr,
                        NumChannels=ifm_ch,
                        inputDataType=dt.name,
                        SIMD=ifm_ch,
                        name="FMPadding_Batch_" + n.name,
                    )
                    graph.node.insert(node_ind, padding_node)

                # Ensure that only supported HLS nodes are inserted
                is_square_image = ConvInpGen_idim_h == ConvInpGen_idim_w
                is_square_kernel = k_h == k_w
                is_kernel_pointwise = k_h == 1 and k_w == 1
                is_equal_stride = stride_h == stride_w
                is_1d_convolution = (k_h == 1 and k_w > 1 and ifm_dim_h == 1) or (
                    k_h > 1 and k_w == 1 and ifm_dim_w == 1
                )

                if (stride_h > 1 or stride_w > 1) and is_kernel_pointwise:
                    assert is_square_image, (
                        "%s : DownSampler currently only supports square input images."
                        % n.name
                    )
                    assert is_equal_stride, (
                        """%s : DownSampler currently only supports equal stride value
                        along different axes."""
                        % n.name
                    )
                    ConvInpGen_idim = ConvInpGen_idim_h
                    stride = stride_h
                    # create DownSampler node
                    ConvInpGen_node = helper.make_node(
                        "DownSampler",
                        [ConvInpGen_input],
                        [i2c_output],
                        domain="finn.custom_op.fpgadataflow",
                        backend="fpgadataflow",
                        ImgDim=ConvInpGen_idim,
                        NumChannels=ifm_ch,
                        SIMD=ifm_ch,
                        Stride=stride,
                        inputDataType=dt.name,
                        name="DownSampler_" + n.name,
                    )
                    graph.node.insert(ConvInpGen_node_idx, ConvInpGen_node)
                else:
                    # create equivalent ConvolutionInputGenerator node
                    if (
                        is_square_image and is_square_kernel
                    ):  # square images and square kernels
                        assert is_equal_stride, (
                            """%s: Non-equal strides along different axes is not supported
                            for (non-)square convolutions"""
                            % n.name
                        )
                        assert dilation_h == 1 and dilation_w == 1, (
                            """%s: Dilation value != 1 is not supported
                            for square convolutions"""
                            % n.name
                        )
                        ConvInpGen_node = helper.make_node(
                            "ConvolutionInputGenerator",
                            [ConvInpGen_input],
                            [i2c_output],
                            domain="finn.custom_op.fpgadataflow",
                            backend="fpgadataflow",
                            ConvKernelDim=[k_h, k_w],
                            IFMChannels=ifm_ch,
                            IFMDim=[ConvInpGen_idim_h, ConvInpGen_idim_w],
                            OFMDim=[ofm_dim_h, ofm_dim_w],
                            SIMD=ifm_ch,
                            Stride=[stride_h, stride_w],
                            Dilation=[dilation_h, dilation_w],
                            inputDataType=dt.name,
                            outputDataType=dt.name,
                            depthwise=depthwise,
                            name="ConvolutionInputGenerator_" + n.name,
                        )
                    else:  # non-square images and/or kernels
                        assert is_1d_convolution, (
                            "%s: ConvolutionInputGenerator1D works only for 1D convs"
                            % n.name
                        )
                        if dilation_h > 1 or dilation_w > 1:
                            assert stride_h == 1 and stride_w == 1, (
                                """%s: Stride value of greater than 1 is not supported for convolutions
                                with dilation value greater than 1"""
                                % n.name
                            )
                        ConvInpGen_node = helper.make_node(
                            "ConvolutionInputGenerator1D",
                            [ConvInpGen_input],
                            [i2c_output],
                            domain="finn.custom_op.fpgadataflow",
                            backend="fpgadataflow",
                            ConvKernelDim=[k_h, k_w],
                            IFMChannels=ifm_ch,
                            IFMDim=[ConvInpGen_idim_h, ConvInpGen_idim_w],
                            OFMDim=[ofm_dim_h, ofm_dim_w],
                            SIMD=ifm_ch,
                            Stride=[stride_h, stride_w],
                            Dilation=[dilation_h, dilation_w],
                            inputDataType=dt.name,
                            outputDataType=dt.name,
                            depthwise=depthwise,
                            name="ConvolutionInputGenerator1D_" + n.name,
                        )
                    graph.node.insert(ConvInpGen_node_idx, ConvInpGen_node)
                # remove old nodes
                graph.node.remove(n)
                graph_modified = True
        if graph_modified:
            model = model.transform(InferShapes())
            model = model.transform(InferDataTypes())
        return (model, graph_modified)
    
class InferQuantizedStreamingFCLayer(Transformation):
    """Convert MatMul layers with quantized inputs and weights to
    StreamingFCLayer_Batch layers. Any immediately following MultiThreshold
    layers will also be absorbed into the MVTU."""

    def __init__(self, mem_mode="const"):
        super().__init__()
        self.mem_mode = mem_mode

    def apply(self, model):
        graph = model.graph
        node_ind = 0
        graph_modified = False
        for n in graph.node:
            node_ind += 1
            if n.op_type == "MatMul" and model.get_tensor_sparsity(n.input[1]) is None:
                print(n)
                mm_input = n.input[0]
                mm_weight = n.input[1]
                mm_output = n.output[0]
                mm_in_shape = model.get_tensor_shape(mm_input)
                mm_out_shape = model.get_tensor_shape(mm_output)
                idt = model.get_tensor_datatype(mm_input)
                wdt = model.get_tensor_datatype(mm_weight)
                if idt.is_integer() and wdt.is_integer():
                    print("here")
                    mm_output = n.output[0]
                    W = model.get_initializer(mm_weight)
                    # extract weight shape, note that ONNX and finn-hlslib
                    # make different assumptions about dim order here
                    # ONNX assumes W has (in, out) shape
                    # finn-hlslib assumes W has (out, in) shape
                    mh = int(W.shape[1])
                    mw = int(W.shape[0])
                    # create node with no parallelization first
                    pe = 1
                    simd = 1
                    wmem = mw * mh // (pe * simd)
                    assert mw * mh == wmem * pe * simd, (
                        n.name
                        + """: Requirement (MW * MH) divisible by
                    (WMEM * PE * SIMD) is violated."""
                    )
                    # see if we have any following thresholds
                    consumer = model.find_consumer(mm_output)
                    if consumer is not None and consumer.op_type == "MultiThreshold":
                        # TODO ensure integer thresholds?
                        # create MVTU (i.e. including activation)
                        mt_output = consumer.output[0]
                        mt_out_shape = model.get_tensor_shape(mt_output)
                        mt_thres = consumer.input[1]
                        T = model.get_initializer(mt_thres)
                        assert T.shape[0] == 1 or T.shape[0] == mh, (
                            consumer.name
                            + """: First dimension of
                        thresholds neither 1 nor MH."""
                        )
                        odt = model.get_tensor_datatype(mt_output)
                        scale = getCustomOp(consumer).get_nodeattr("out_scale")
                        actval = getCustomOp(consumer).get_nodeattr("out_bias")
                        assert int(actval) == actval, (
                            consumer.name
                            + ": out_bias must be integer for HLS conversion."
                        )
                        actval = int(actval)
                        odt_is_bipolar = odt == DataType["BIPOLAR"]
                        bipolar_ok = (
                            odt_is_bipolar and (scale == 2.0) and (actval == -1)
                        )
                        assert scale == 1.0 or bipolar_ok, (
                            consumer.name
                            + ": out_scale=1 or bipolar output needed for conversion."
                        )
                        assert (not odt.signed()) or (actval < 0), (
                            consumer.name + ": Signed output requres actval < 0"
                        )
                        model.set_tensor_shape(mm_input, mm_in_shape)
                        model.set_tensor_shape(mt_output, mt_out_shape)
                        if bipolar_ok:
                            # remove bias for bipolar, since
                            # binary->bipolar is achieved by reinterpretation
                            actval = 0
                        # create and insert new StreamingFCLayer node
                        new_node = helper.make_node(
                            "StreamingFCLayer_Batch",
                            [mm_input, mm_weight, mt_thres],
                            [mt_output],
                            domain="finn.custom_op.fpgadataflow",
                            backend="fpgadataflow",
                            MW=mw,
                            MH=mh,
                            SIMD=simd,
                            PE=pe,
                            inputDataType=idt.name,
                            weightDataType=wdt.name,
                            outputDataType=odt.name,
                            ActVal=actval,
                            binaryXnorMode=0,
                            noActivation=0,
                            numInputVectors=list(mm_in_shape[:-1]),
                            mem_mode=self.mem_mode,
                            name="StreamingFCLayer_Batch_" + n.name,
                        )
                        graph.node.insert(node_ind, new_node)
                        # remove old nodes
                        graph.node.remove(n)
                        graph.node.remove(consumer)
                        graph_modified = True
                    else:
                        # no activation, matmul only
                        odt = model.get_tensor_datatype(mm_output)
                        model.set_tensor_shape(mm_input, mm_in_shape)
                        model.set_tensor_shape(mm_output, mm_out_shape)
                        # create and insert new StreamingFCLayer node
                        new_node = helper.make_node(
                            "StreamingFCLayer_Batch",
                            [mm_input, mm_weight],
                            [mm_output],
                            domain="finn.custom_op.fpgadataflow",
                            backend="fpgadataflow",
                            MW=mw,
                            MH=mh,
                            SIMD=simd,
                            PE=pe,
                            inputDataType=idt.name,
                            weightDataType=wdt.name,
                            outputDataType=odt.name,
                            ActVal=0,
                            binaryXnorMode=0,
                            noActivation=1,
                            numInputVectors=list(mm_in_shape[:-1]),
                            mem_mode=self.mem_mode,
                            name="StreamingFCLayer_Batch_" + n.name,
                        )
                        graph.node.insert(node_ind, new_node)
                        # remove old node
                        graph.node.remove(n)
                        graph_modified = True
        if graph_modified:
            model = model.transform(MinimizeAccumulatorWidth())
            model = model.transform(InferShapes())
            model = model.transform(InferDataTypes())
        return (model, graph_modified)

In [8]:
from finn.transformation.move_reshape import RemoveCNVtoFCFlatten
mem_mode = "decoupled"
# needed for bipolar MatMul layers
model = model.transform(to_hls.InferDuplicateStreamsLayer())
model = model.transform(to_hls.InferBinaryStreamingFCLayer(mem_mode))
# needed for non-bipolar MatMul layers
model = model.transform(InferQuantizedStreamingFCLayer(mem_mode))
# TopK to LabelSelect
model.save(build_dir + "/end2end_sentiment_to_hls.onnx")
model = model.transform(to_hls.InferLabelSelectLayer())
# input quantization (if any) as standalone threshold
model = model.transform(to_hls.InferThresholdingLayer())
# needed for convolutions -- TODO always exec?
need_conv = len(model.get_nodes_by_op_type("Im2Col")) > 0
if need_conv:
    print("hello")
    model = model.transform(InferConvInpGen())
    model = model.transform(to_hls.InferStreamingMaxPool())
    model = model.transform(RemoveCNVtoFCFlatten())
# get rid of Tranpose -> Tranpose identity seq
model = model.transform(absorb.AbsorbConsecutiveTransposes())
model = model.transform(GiveUniqueNodeNames())
model = model.transform(InferDataLayouts())
model.save(build_dir + "/end2end_sentiment_to_hls.onnx")

input: "Im2Col_0_out0"
input: "MatMul_0_param0"
output: "MatMul_0_out0"
name: "MatMul_0"
op_type: "MatMul"

here
input: "Reshape_0_out0"
input: "MatMul_1_param0"
output: "MatMul_1_out0"
name: "MatMul_1"
op_type: "MatMul"

here
input: "MultiThreshold_2_out0"
input: "MatMul_2_param0"
output: "MatMul_2_out0"
name: "MatMul_2"
op_type: "MatMul"

here
input: "MultiThreshold_3_out0"
input: "MatMul_3_param0"
output: "MatMul_3_out0"
name: "MatMul_3"
op_type: "MatMul"

here
hello
INT4


In [9]:
showInNetron(build_dir + "/end2end_sentiment_to_hls.onnx")

Stopping http://0.0.0.0:8081
Serving 'builds/end2end_sentiment_to_hls.onnx' at http://0.0.0.0:8081


In [12]:
from finn.core.modelwrapper import ModelWrapper
from finn.custom_op.registry import getCustomOp
from finn.transformation.base import Transformation
from finn.transformation.fpgadataflow.externalize_params import ExternalizeParams
from finn.util.basic import get_by_name, make_build_dir

import copy
import pathlib
from onnx import helper


class PartitionFromLambda(Transformation):
    """Split a graph into partitions. Each resulting partition node has a model
    attribute indicating the path to the subordinate onnx file.
    Cleanup and InferShapes() transformations should be applied first.
    Argument 0: partitioning
    * Function performing the mapping: node -> partition_id (int or string)
    * Partitions may not cover the graph completely (nodes mapped to -1 are retained)
    * Mapping must return -1 for GenericPartition nodes
    Argument 1 (optional): partition_dir
    * Manually define where to save the partition models
    """

    def __init__(self, partitioning=lambda node: -1, partition_dir=None):
        super().__init__()
        self.partitioning = partitioning
        self.partition_dir = partition_dir

    def apply(self, model):
        # identify partitions to create
        original_nodes = list(model.graph.node)
        print("original nodes", original_nodes)
        partition_ids = set(list(map(self.partitioning, original_nodes)))
        partition_ids.discard(-1)
        
        print("partition ids",partition_ids)

        # prepare dir for generated .onnx models
        if self.partition_dir is None:
            self.partition_dir = make_build_dir("partitioning_")
        else:
            pathlib.Path(self.partition_dir).mkdir(parents=True, exist_ok=True)

        for partition_id in partition_ids:
            all_nodes = list(model.graph.node)
            partition_nodes = list(
                filter(lambda x: self.partitioning(x) == partition_id, all_nodes)
            )
            non_partition_nodes = list(
                filter(lambda x: x not in partition_nodes, all_nodes)
            )

            # partition the model into two models
            p_model = copy.deepcopy(model)
            non_p_model = model
            # remove all non-partition nodes from the partition model
            for node_to_remove in non_partition_nodes:
                p_model.graph.node.remove(node_to_remove)

            # identify the entry and exit points for the partition part
            p_in = []
            p_out = []
            p_start_ind = 0
            for node in p_model.graph.node:
                for in_tensor in node.input:
                    # check if producer has been removed = lies outside the partition
                    has_initializer = in_tensor in [
                        x.name for x in p_model.graph.initializer
                    ]
                    has_producer = p_model.find_producer(in_tensor) is not None
                    if not has_initializer and not has_producer:
                        # the same tensor could feed multiple nodes within the partition
                        # (e.g. for residual connections), so we avoid duplicates
                        if in_tensor not in p_in:
                            p_in.append(in_tensor)
                        # keep track of where this partition starts topologically
                        if p_start_ind == 0:
                            p_start_ind = all_nodes.index(node)
                for out_tensor in node.output:
                    # check if tensor is top-level output
                    # or has a consumer outside the partition
                    if out_tensor in [x.name for x in model.graph.output]:
                        if out_tensor not in p_out:
                            p_out.append(out_tensor)
                    else:
                        for consumer in model.find_consumers(out_tensor):
                            if self.partitioning(consumer) != partition_id:
                                if out_tensor not in p_out:
                                    p_out.append(out_tensor)

            p_in_vi = list(map(lambda x: p_model.get_tensor_valueinfo(x), p_in))
            p_out_vi = list(map(lambda x: p_model.get_tensor_valueinfo(x), p_out))

            # check if partitioning is legal (i.e. creates no cycles)
            to_check = [model.find_producer(x) for x in p_in]
            while len(to_check) > 0:
                next_to_check = []
                for node in to_check:
                    if node is not None:
                        assert (
                            self.partitioning(node) != partition_id
                        ), """cycle-free graph violated: partition depends on itself"""
                        # print(node)
                        predecessors = model.find_direct_predecessors(node)
                        if predecessors is not None:
                            next_to_check.extend(predecessors)
                to_check = next_to_check

            # set p graph in/out to be p_in/p_out
            while len(p_model.graph.input) > 0:
                p_model.graph.input.pop()
            for i in p_in_vi:
                p_model.graph.input.append(i)

            while len(p_model.graph.output) > 0:
                p_model.graph.output.pop()
            for o in p_out_vi:
                p_model.graph.output.append(o)

            # remove redundant input and output value_info entries
            for i in p_in_vi:
                if i in p_model.graph.value_info:
                    p_model.graph.value_info.remove(i)

            for o in p_out_vi:
                if o in p_model.graph.value_info:
                    p_model.graph.value_info.remove(o)

            # save partition model
            p_model_filename = (
                self.partition_dir + "/partition_" + str(partition_id) + ".onnx"
            )
            p_model.cleanup()
            p_model.save(p_model_filename)

            # insert GenericPartition node
            p_node = helper.make_node(
                "GenericPartition",
                p_in,
                p_out,
                name="GenericPartition_" + str(partition_id),
                # use the model attribute to mark the partition model
                model=p_model_filename,
                domain="finn.custom_op.general",
            )
            non_p_model.graph.node.insert(p_start_ind, p_node)

            # remove all partition nodes from the parent model
            # do this after inserting the p_node for easier p_start_ind handling
            for node_to_remove in partition_nodes:
                non_p_model.graph.node.remove(node_to_remove)

            model = non_p_model

        return (model, False)

class CreateDataflowPartition(Transformation):
    """Split a graph into two graphs; one which contains non-FINN-dataflow nodes
    and a StreamingDataflowPartition node, and another which only contains
    FINN dataflow nodes. The StreamingDataflowPartition has a model attribute
    that indicates the filename for the second graph that only contains
    dataflow nodes. No action is taken if there are no dataflow nodes."""

    def __init__(self, partition_model_dir=None):
        super().__init__()
        if partition_model_dir is None:
            self.partition_model_dir = make_build_dir("dataflow_partition_")
        else:
            self.partition_model_dir = partition_model_dir

    def apply(self, model):
        def filter_fc_extw(x):
            if x.op_type == "IODMA":
                burst_mode = get_by_name(x.attribute, "burstMode")
                if burst_mode is not None:
                    burst_mode = burst_mode.s.decode("UTF-8")
                    return burst_mode == "wrap"

        extw_dma_nodes = list(filter(filter_fc_extw, model.graph.node))
        if len(extw_dma_nodes) > 0:
            print("dma nodes present")
            model = model.transform(ExternalizeParams())

        def assign_partition_id(node):
            if node.op_type in ["GenericPartition", "StreamingDataflowPartition"]:
                return -1
            else:
                backend = get_by_name(node.attribute, "backend")
                print("backend", backend)
                if backend is not None and backend.s.decode("UTF-8") == "fpgadataflow":
                    assigned_partition = get_by_name(node.attribute, "partition_id")
                    if assigned_partition is not None:
                        return assigned_partition.i
                    else:
                        return 0
                else:
                    return -1

        # first, use the generic partitioning functionality to split up the graph
        parent_model = model.transform(
            PartitionFromLambda(
                partitioning=assign_partition_id, partition_dir=self.partition_model_dir
            )
        )
        # change node types to StreamingDataflowPartition
        p_nodes = parent_model.get_nodes_by_op_type("GenericPartition")
        for partition_ind, p_node in enumerate(p_nodes):
            # go into partition to extract some info
            p_node_inst = getCustomOp(p_node)
            node_model_filename = p_node_inst.get_nodeattr("model")
            p_model = ModelWrapper(node_model_filename)
            # check floorplan (SLR assignment per node)
            inst = getCustomOp(p_model.graph.node[0])
            slr = inst.get_nodeattr("slr")
            for node in p_model.graph.node:
                inst = getCustomOp(node)
                assert slr == inst.get_nodeattr(
                    "slr"
                ), """all nodes with same partition_id must have the same slr id"""
            # check that there is only one non-null mem_port per partition
            nmemports = 0
            mem_port = ""
            for node in p_model.graph.node:
                inst = getCustomOp(node)
                port = inst.get_nodeattr("mem_port")
                if port is not None and port != "":
                    nmemports += 1
                    mem_port = port
            assert nmemports <= 1, """Too many memory ports per partition"""
            # done, change node type and add info in parent graph
            p_node.op_type = "StreamingDataflowPartition"
            p_node.domain = "finn.custom_op.fpgadataflow"
            new_p_node_inst = getCustomOp(p_node)
            new_p_node_inst.set_nodeattr("partition_id", partition_ind)
            new_p_node_inst.set_nodeattr("slr", slr)
            new_p_node_inst.set_nodeattr("mem_port", mem_port)

        return (parent_model, False)
    


In [142]:
#from finn.transformation.fpgadataflow.create_dataflow_partition import (
#    CreateDataflowPartition,
#)
parent_model = model.transform(
        CreateDataflowPartition(
        )
    )
parent_model.save("u250_encoder" + "/intermediate_models/dataflow_parent.onnx")
sdp_nodes = parent_model.get_nodes_by_op_type("StreamingDataflowPartition")

original nodes [input: "global_in"
input: "MultiThreshold_0_param0"
output: "MultiThreshold_0_out0"
name: "MultiThreshold_0"
op_type: "MultiThreshold"
attribute {
  name: "out_dtype"
  s: "INT8"
  type: STRING
}
attribute {
  name: "out_bias"
  f: -128.0
  type: FLOAT
}
doc_string: ""
domain: "finn.custom_op.general"
, input: "MultiThreshold_0_out0"
input: "Mul_0_param0"
output: "Mul_0_out0"
name: "Mul_0"
op_type: "Mul"
doc_string: "/home/roba/.conda/envs/roba_jupyter/lib/python3.9/site-packages/torch/nn/functional.py(1169): dropout\n/home/roba/.conda/envs/roba_jupyter/lib/python3.9/site-packages/torch/nn/modules/dropout.py(58): forward\n/home/roba/.conda/envs/roba_jupyter/lib/python3.9/site-packages/torch/nn/modules/module.py(1090): _slow_forward\n/home/roba/.conda/envs/roba_jupyter/lib/python3.9/site-packages/torch/nn/modules/module.py(1102): _call_impl\n/tmp/ipykernel_8514/1750020267.py(101): forward\n/home/roba/.conda/envs/roba_jupyter/lib/python3.9/site-packages/torch/nn/modules/m

AssertionError: cycle-free graph violated: partition depends on itself

In [143]:
len(sdp_nodes)

0

Stopping http://0.0.0.0:8081
Serving 'u250_encoder/intermediate_models/dataflow_parent.onnx' at http://0.0.0.0:8081


In [11]:
!top 

[?1h=[H[2J[mtop - 17:34:07 up 56 days,  3:33,  0 users,  load average: 162.21, 162.23, 162.2[m[m[m[m[K
Tasks:[m[m[1m  20 [m[mtotal,[m[m[1m   1 [m[mrunning,[m[m[1m  19 [m[msleeping,[m[m[1m   0 [m[mstopped,[m[m[1m   0 [m[mzombie[m[m[m[m[K
%Cpu(s):[m[m[1m  9.7 [m[mus,[m[m[1m  7.8 [m[msy,[m[m[1m  0.1 [m[mni,[m[m[1m 82.3 [m[mid,[m[m[1m  0.0 [m[mwa,[m[m[1m  0.0 [m[mhi,[m[m[1m  0.0 [m[msi,[m[m[1m  0.0 [m[mst[m[m[m[m[K
KiB Mem :[m[m[1m 13167875+[m[mtotal,[m[m[1m 86194048 [m[mfree,[m[m[1m 31800628 [m[mused,[m[m[1m 13684080 [m[mbuff/cache[m[m[m[m[K
KiB Swap:[m[m[1m  8388604 [m[mtotal,[m[m[1m   442560 [m[mfree,[m[m[1m  7946044 [m[mused.[m[m[1m 96918744 [m[mavail Mem [m[m[m[m[K
[K
[7m  PID USER      PR  NI    VIRT    RES    SHR S  %CPU %MEM     TIME+ COMMAND     [m[m[K
[m 5626 neni      20   0 1452748 715640 104740 S 100.0  0.5  21:37.49 vivado_hls  [m[m[K

[H[mtop - 17:34:25 up 56 days,  3:33,  0 users,  load average: 162.24, 162.24, 162.2[m[m[m[m[K

%Cpu(s):[m[m[1m 62.1 [m[mus,[m[m[1m 37.8 [m[msy,[m[m[1m  0.0 [m[mni,[m[m[1m  0.0 [m[mid,[m[m[1m  0.0 [m[mwa,[m[m[1m  0.0 [m[mhi,[m[m[1m  0.0 [m[msi,[m[m[1m  0.0 [m[mst[m[m[m[m[K
KiB Mem :[m[m[1m 13167875+[m[mtotal,[m[m[1m 86821472 [m[mfree,[m[m[1m 31173148 [m[mused,[m[m[1m 13684132 [m[mbuff/cache[m[m[m[m[K
KiB Swap:[m[m[1m  8388604 [m[mtotal,[m[m[1m   442560 [m[mfree,[m[m[1m  7946044 [m[mused.[m[m[1m 97546208 [m[mavail Mem [m[m[m[m[K
[K

[m 5626 neni      20   0 1452748 715640 104740 S  99.3  0.5  21:55.48 vivado_hls  [m[m[K
[m 5955 neni      20   0 6163304 336488 161068 S   1.0  0.3   0:07.42 python      [m[m[K
[m    7 neni      20   0  268184  54156   7460 S   0.3  0.0   4:02.64 jupyter-no+ [m[m[K

[m  455 root      20   0   18616      4      4 S   0.0  0.0   0:00.11 bash    