In [51]:
import onnx
from finn.util.basic import make_build_dir
from finn.util.visualization import showSrc, showInNetron

import torch

from finn.util.test import get_test_model_trained
import brevitas.onnx as bo
from finn.core.modelwrapper import ModelWrapper
from finn.transformation.infer_shapes import InferShapes
from finn.transformation.fold_constants import FoldConstants
from finn.transformation.general import GiveReadableTensorNames, GiveUniqueNodeNames, RemoveStaticGraphInputs
from torch.nn import Module, ModuleList, BatchNorm2d, MaxPool2d, BatchNorm1d
from QuantLeNet import *
from brevitas.nn import QuantConv2d, QuantIdentity, QuantLinear
from brevitas.core.restrict_val import RestrictValueType
from brevitas_examples.bnn_pynq.models.common import CommonWeightQuant, CommonActQuant
from brevitas.core.restrict_val import RestrictValueType
from brevitas_examples.bnn_pynq.models.tensor_norm import TensorNorm

from finn.transformation.streamline import Streamline
from finn.transformation.lower_convs_to_matmul import LowerConvsToMatMul
from finn.transformation.bipolar_to_xnor import ConvertBipolarMatMulToXnorPopcount
import finn.transformation.streamline.absorb as absorb
from finn.transformation.streamline.reorder import MakeMaxPoolNHWC, MoveScalarLinearPastInvariants
from finn.transformation.infer_data_layouts import InferDataLayouts
from finn.transformation.general import RemoveUnusedTensors

import finn.transformation.fpgadataflow.convert_to_hls_layers as to_hls
from finn.transformation.fpgadataflow.create_dataflow_partition import (
    CreateDataflowPartition,
)
from finn.transformation.fpgadataflow.set_folding import SetFolding
from finn.transformation.move_reshape import RemoveCNVtoFCFlatten
from finn.custom_op.registry import getCustomOp
from finn.transformation.infer_data_layouts import InferDataLayouts


import netron

stopit = lambda: netron.stop(8081, "0.0.0.0")

from brevitas.core.scaling import ScalingImplType
from brevitas.core.stats import StatsOp
from brevitas.nn import QuantReLU
from brevitas.core.quant import QuantType

# Change `REPLICATE_ERROR` to <font color='red'>True</font>
to replicate the Dataflow Partitioning error. For the parameters for our custom net, Dataflow Partitioning splits the parent and child models into multiple pieces

In [70]:
REPLICATE_ERROR = True

if REPLICATE_ERROR:
    '''
    We use the Default CNV as shown on https://github.com/Xilinx/brevitas/blob/203c26f50a2074b4193b40c614766fadd761f7ee/brevitas_examples/bnn_pynq/models/CNV.py
    
    This default model has been modified to run on MNIST images
    '''
    # MNIST parameters
    IN_CH, IMG_HEIGHT, IMG_WIDTH = 1, 28, 28
    
    # CNV Model parameters
#     CNV_OUT_CH_POOL = [(6, True), (16, True)]

    CNV_OUT_CH_POOL = [(16, False), (16, True), (32, True), (64, False), (64, False)]
#     INTERMEDIATE_FC_FEATURES = [(16*4*4, 120), (120, 84)]
    INTERMEDIATE_FC_FEATURES = [(64, 120), (120, 84)]

    LAST_FC_IN_FEATURES = 84
    POOL_SIZE = 2
    KERNEL_SIZE = 3
    
    INPUT_WIDTH, WEIGHT_WIDTH, ACT_WIDTH = 8,2,2
    
    MODEL_PREFIX = "error_model"
    
    
else:
    '''
    We use the Default CNV as shown on https://github.com/Xilinx/brevitas/blob/203c26f50a2074b4193b40c614766fadd761f7ee/brevitas_examples/bnn_pynq/models/CNV.py
    This model runs on CIFAR images
    '''
    # CIFAR parameters
    IN_CH, IMG_HEIGHT, IMG_WIDTH = 3, 32, 32
    
    # CNV Model parameters
    CNV_OUT_CH_POOL = [(64, False), (64, True), (128, False), (128, True), (256, False), (256, False)]
    INTERMEDIATE_FC_FEATURES = [(256, 512), (512, 512)]
    LAST_FC_IN_FEATURES = 512
    POOL_SIZE = 2
    KERNEL_SIZE = 3
    
    INPUT_WIDTH, WEIGHT_WIDTH, ACT_WIDTH = 8,4,2
    
    MODEL_PREFIX = "working_model"
    

In [71]:
LAST_FC_PER_OUT_CH_SCALING = False

class CNV(Module):

    def __init__(self, num_classes, weight_bit_width, act_bit_width, in_bit_width, in_ch):
        super(CNV, self).__init__()

        self.conv_features = ModuleList()
        self.linear_features = ModuleList()

        self.conv_features.append(QuantIdentity( # for Q1.7 input format
            act_quant=CommonActQuant,
            bit_width=in_bit_width,
            min_val=- 1.0,
            max_val=1.0 - 2.0 ** (-7),
            narrow_range=False,
            restrict_scaling_type=RestrictValueType.POWER_OF_TWO))

        for out_ch, is_pool_enabled in CNV_OUT_CH_POOL:
            self.conv_features.append(QuantConv2d(
                kernel_size=KERNEL_SIZE,
                in_channels=in_ch,
                out_channels=out_ch,
                bias=False,
                weight_quant=CommonWeightQuant,
                weight_bit_width=weight_bit_width))
            in_ch = out_ch
            self.conv_features.append(BatchNorm2d(in_ch, eps=1e-4))
            self.conv_features.append(QuantIdentity(
                act_quant=CommonActQuant,
                bit_width=act_bit_width))
            if is_pool_enabled:
                self.conv_features.append(MaxPool2d(kernel_size=2))

        for in_features, out_features in INTERMEDIATE_FC_FEATURES:
            self.linear_features.append(QuantLinear(
                in_features=in_features,
                out_features=out_features,
                bias=False,
                weight_quant=CommonWeightQuant,
                weight_bit_width=weight_bit_width))
            self.linear_features.append(BatchNorm1d(out_features, eps=1e-4))
            self.linear_features.append(QuantIdentity(
                act_quant=CommonActQuant,
                bit_width=act_bit_width))

        self.linear_features.append(QuantLinear(
            in_features=LAST_FC_IN_FEATURES,
            out_features=num_classes,
            bias=False,
            weight_quant=CommonWeightQuant,
            weight_bit_width=weight_bit_width))
        self.linear_features.append(TensorNorm())
        
        for m in self.modules():
            if isinstance(m, QuantConv2d) or isinstance(m, QuantLinear):
                torch.nn.init.uniform_(m.weight.data, -1, 1)


    def clip_weights(self, min_val, max_val):
        for mod in self.conv_features:
            if isinstance(mod, QuantConv2d):
                mod.weight.data.clamp_(min_val, max_val)
        for mod in self.linear_features:
            if isinstance(mod, QuantLinear):
                mod.weight.data.clamp_(min_val, max_val)

    def forward(self, x):
        x = 2.0 * x - torch.tensor([1.0], device=x.device)
        for mod in self.conv_features:
            x = mod(x)
        x = x.view(x.shape[0], -1)
        for mod in self.linear_features:
            x = mod(x)
        return x

def cnv(in_bit_width, weight_bit_width, act_bit_width, num_classes, in_channels):
    net = CNV(weight_bit_width=weight_bit_width,
              act_bit_width=act_bit_width,
              in_bit_width=in_bit_width,
              num_classes=num_classes,
              in_ch=in_channels)
    return net

In [72]:
model = cnv(INPUT_WIDTH,WEIGHT_WIDTH,ACT_WIDTH,10,IN_CH)
# from QuantLeNet import QuantLeNet
# model = QuantLeNet(INPUT_WIDTH, WEIGHT_WIDTH, ACT_WIDTH)

In [73]:
build_dir = './onnx'

bo.export_finn_onnx(model, (1, IN_CH, IMG_WIDTH, IMG_HEIGHT), build_dir + f"/{MODEL_PREFIX}_export.onnx")
model = ModelWrapper(build_dir + f"/{MODEL_PREFIX}_export.onnx")
model = model.transform(InferShapes())
model = model.transform(FoldConstants())
model = model.transform(GiveUniqueNodeNames())
model = model.transform(GiveReadableTensorNames())
model = model.transform(RemoveStaticGraphInputs())
model.save(build_dir + f"/{MODEL_PREFIX}_tidy.onnx")

showInNetron(build_dir+f"/{MODEL_PREFIX}_tidy.onnx")






Stopping http://0.0.0.0:8081
Serving './onnx/error_model_tidy.onnx' at http://0.0.0.0:8081


In [74]:

##################################################################
from finn.util.pytorch import ToTensor
from finn.transformation.merge_onnx_models import MergeONNXModels
from finn.core.datatype import DataType

model = ModelWrapper(build_dir+f"/{MODEL_PREFIX}_tidy.onnx")
global_inp_name = model.graph.input[0].name
ishape = model.get_tensor_shape(global_inp_name)

# preprocessing: torchvision's ToTensor divides uint8 inputs by 255
totensor_pyt = ToTensor()
chkpt_preproc_name = build_dir+f"/{MODEL_PREFIX}_preproc.onnx"
bo.export_finn_onnx(totensor_pyt, ishape, chkpt_preproc_name)

# join preprocessing and core model
pre_model = ModelWrapper(chkpt_preproc_name)
model = model.transform(MergeONNXModels(pre_model))

# add input quantization annotation: UINT8 for all BNN-PYNQ models
global_inp_name = model.graph.input[0].name
model.set_tensor_datatype(global_inp_name, DataType.UINT8)


In [75]:

##################################################################
from finn.transformation.insert_topk import InsertTopK
from finn.transformation.infer_datatypes import InferDataTypes

# postprocessing: insert Top-1 node at the end
model = model.transform(InsertTopK(k=1))
chkpt_name = build_dir+f"/{MODEL_PREFIX}_pre_post.onnx"
# tidy-up again
model = model.transform(InferShapes())
model = model.transform(FoldConstants())
model = model.transform(GiveUniqueNodeNames())
model = model.transform(GiveReadableTensorNames())
model = model.transform(InferDataTypes())
model = model.transform(RemoveStaticGraphInputs())
model.save(chkpt_name)

showInNetron(build_dir+f"/{MODEL_PREFIX}_pre_post.onnx")


Stopping http://0.0.0.0:8081
Serving './onnx/error_model_pre_post.onnx' at http://0.0.0.0:8081


In [76]:
model = ModelWrapper(build_dir + f"/{MODEL_PREFIX}_pre_post.onnx")
model = model.transform(MoveScalarLinearPastInvariants())
model = model.transform(Streamline())
model = model.transform(LowerConvsToMatMul())
model = model.transform(MakeMaxPoolNHWC())
model = model.transform(absorb.AbsorbTransposeIntoMultiThreshold())

# quantization width greater than 1, so we don't do this
model = model.transform(ConvertBipolarMatMulToXnorPopcount())

model = model.transform(Streamline())
# absorb final add-mul nodes into TopK
model = model.transform(absorb.AbsorbScalarMulAddIntoTopK())
model = model.transform(InferDataLayouts())
model = model.transform(RemoveUnusedTensors())
model.save(build_dir + f"/{MODEL_PREFIX}_streamlined.onnx")
showInNetron(build_dir+f"/{MODEL_PREFIX}_streamlined.onnx")


Stopping http://0.0.0.0:8081
Serving './onnx/error_model_streamlined.onnx' at http://0.0.0.0:8081


In [None]:
# import pdb; pdb.pm()

# DataFlow Partitioning Splits Model

In [77]:
# choose the memory mode for the MVTU units, decoupled or const
mem_mode = "decoupled"

model = ModelWrapper(build_dir + f"/{MODEL_PREFIX}_streamlined.onnx")

# Not doing Binary Streaming FC Layer because we don't have a BNN
model = model.transform(to_hls.InferBinaryStreamingFCLayer(mem_mode))
model = model.transform(to_hls.InferQuantizedStreamingFCLayer(mem_mode))


# TopK to LabelSelect
model = model.transform(to_hls.InferLabelSelectLayer())
# # input quantization (if any) to standalone thresholding
model = model.transform(to_hls.InferThresholdingLayer())
model = model.transform(to_hls.InferConvInpGen())
model = model.transform(to_hls.InferStreamingMaxPool())
# # get rid of Reshape(-1, 1) operation between hlslib nodes
model = model.transform(RemoveCNVtoFCFlatten())
# # get rid of Tranpose -> Tranpose identity seq
model = model.transform(absorb.AbsorbConsecutiveTransposes())
# infer tensor data layouts


# model = model.transform(InferDataLayouts())
parent_model = model.transform(CreateDataflowPartition())
parent_model.save(build_dir + f"/{MODEL_PREFIX}_dataflow_parent.onnx")
showInNetron(build_dir + f"/{MODEL_PREFIX}_dataflow_parent.onnx")



Stopping http://0.0.0.0:8081
Serving './onnx/error_model_dataflow_parent.onnx' at http://0.0.0.0:8081


In [81]:
sdp_node = parent_model.get_nodes_by_op_type("StreamingDataflowPartition")[0]
sdp_node = getCustomOp(sdp_node)
dataflow_model_filename = sdp_node.get_nodeattr("model")
# save the dataflow partition with a different name for easier access
dataflow_model = ModelWrapper(dataflow_model_filename)
dataflow_model.save(build_dir + f"/{MODEL_PREFIX}_dataflow_model.onnx")
showInNetron(build_dir + f"/{MODEL_PREFIX}_dataflow_model.onnx")


Stopping http://0.0.0.0:8081
Serving './onnx/error_model_dataflow_model.onnx' at http://0.0.0.0:8081


In [90]:
stopit()


Stopping http://0.0.0.0:8081


In [108]:
# Auto-folding did not succeed. It resulted in an error further downstream
# from finn.transformation.fpgadataflow.set_folding import SetFolding

model = ModelWrapper(build_dir + f"/{MODEL_PREFIX}_dataflow_model.onnx")

fc_layers = model.get_nodes_by_op_type("StreamingFCLayer_Batch")
# print(len(fc_layers))

# print (fc_layers[2])

folding = [
    (16, 1, 128),
    (16, 16, 128),
    (16, 16, 128),
    (16, 16, 128),
    (1, 16, 2),
    (1, 4, 2),
    (1, 8, 128),
    (5, 1, 1),
]
for fcl, (pe, simd, ififodepth) in zip(fc_layers, folding):
    fcl_inst = getCustomOp(fcl)
    fcl_inst.set_nodeattr("PE", pe)
    fcl_inst.set_nodeattr("SIMD", simd)
    fcl_inst.set_nodeattr("inFIFODepth", ififodepth)

# use same SIMD values for the sliding window operators
swg_layers = model.get_nodes_by_op_type("ConvolutionInputGenerator")
for i in range(len(swg_layers)):
    swg_inst = getCustomOp(swg_layers[i])
    simd = folding[i][1]
    swg_inst.set_nodeattr("SIMD", simd)

model = model.transform(GiveUniqueNodeNames())

# folded_model = dataflow_model.transform(SetFolding())
model.save(build_dir + f"/{MODEL_PREFIX}_folded.onnx")
showInNetron(build_dir + f"/{MODEL_PREFIX}_folded.onnx")


Stopping http://0.0.0.0:8081
Serving './onnx/error_model_folded.onnx' at http://0.0.0.0:8081


In [109]:
test_pynq_board = "Pynq-Z1"
target_clk_ns = 10

from finn.transformation.fpgadataflow.make_zynq_proj import ZynqBuild
model = ModelWrapper(build_dir+ f"/{MODEL_PREFIX}_folded.onnx")
model = model.transform(ZynqBuild(platform = test_pynq_board, period_ns = target_clk_ns))
model.save(build_dir +  f"/{MODEL_PREFIX}_synth.onnx")

  + "and no default value was set"


In [105]:
import pdb; pdb.pm()

> /opt/conda/lib/python3.6/site-packages/IPython/core/compilerop.py(100)ast_parse()
-> return compile(source, filename, symbol, self.flags | PyCF_ONLY_AST, 1)
(Pdb) list
 95  	    def ast_parse(self, source, filename='<unknown>', symbol='exec'):
 96  	        """Parse code to an AST with the current compiler flags active.
 97  	
 98  	        Arguments are exactly the same as ast.parse (in the standard library),
 99  	        and are passed to the built-in compile function."""
100  ->	        return compile(source, filename, symbol, self.flags | PyCF_ONLY_AST, 1)
101  	
102  	    def reset_compiler_flags(self):
103  	        """Reset compiler flags to default state."""
104  	        # This value is copied from codeop.Compile.__init__, so if that ever
105  	        # changes, it will need to be updated.
(Pdb) new_shape
*** NameError: name 'new_shape' is not defined
(Pdb) self
<IPython.core.compilerop.CachingCompiler object at 0x7faa02c99a90>
(Pdb) quit
