In [1]:
import time
import os
import json
import pathlib
import pytest
import numpy as np

import finn.core.onnx_exec as oxe
from finn.transformation.fold_constants import FoldConstants
from finn.transformation.general import RemoveStaticGraphInputs
from finn.transformation.infer_shapes import InferShapes
#import brevitas_examples.speech_to_text as stt

from finn.custom_op.registry import getCustomOp
from finn.util.test import (
    load_test_checkpoint_or_skip
)
from finn.core.modelwrapper import ModelWrapper
from finn.core.datatype import DataType
from finn.util.basic import get_by_name

from finn.transformation.change_3d_tensors_to_4d import Change3DTo4DTensors
from finn.transformation.general import (
    GiveUniqueNodeNames,
    GiveRandomTensorNames,
    GiveReadableTensorNames,
    GiveUniqueParameterTensors
)
from finn.transformation.batchnorm_to_affine import BatchNormToAffine
from finn.transformation.streamline.reorder import (
    MoveAddPastMul,
    MoveAddPastConv,
    MoveMulPastFork,
    MoveScalarMulPastConv,
    MoveMulPastDWConv,
    MoveLinearPastEltwiseAdd
)
from finn.transformation.streamline.collapse_repeated import(
    CollapseRepeatedAdd,
    CollapseRepeatedMul
)
from finn.transformation.streamline.absorb import(
    AbsorbAddIntoMultiThreshold,
    AbsorbMulIntoMultiThreshold,
    FactorOutMulSignMagnitude,
    Absorb1BitMulIntoConv,
    AbsorbSignBiasIntoMultiThreshold
)
from finn.transformation.streamline.round_thresholds import RoundAndClipThresholds
from finn.transformation.infer_datatypes import InferDataTypes
from finn.transformation.create_generic_partitions import PartitionFromDict
from finn.transformation.lower_convs_to_matmul import LowerConvsToMatMul
from finn.transformation.streamline.absorb import AbsorbTransposeIntoMultiThreshold
from finn.transformation.streamline.reorder import (
    MoveTransposePastMultiThreshold,
    MoveTransposePastJoinAdd,
    MoveTransposeBeforeFork
)
from finn.transformation.extend_partition import ExtendPartition
import finn.transformation.fpgadataflow.convert_to_hls_layers as to_hls
from finn.transformation.fpgadataflow.synth_ooc import SynthOutOfContext
from finn.transformation.fpgadataflow.insert_dwc import InsertDWC
from finn.transformation.fpgadataflow.insert_fifo import InsertFIFO
from finn.transformation.fpgadataflow.create_dataflow_partition import CreateDataflowPartition
from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim
from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
from finn.transformation.fpgadataflow.replace_verilog_relpaths import (
    ReplaceVerilogRelPaths,
)
from finn.transformation.fpgadataflow.annotate_resources import AnnotateResources
from finn.transformation.fpgadataflow.set_folding import SetFolding
from finn.util.basic import alveo_part_map, alveo_default_platform
from finn.util.config import extract_model_config_to_json
from finn.transformation.fpgadataflow.set_fifo_depths import (
    InsertAndSetFIFODepths,
    RemoveShallowFIFOs
)
from finn.transformation.fpgadataflow.create_stitched_ip import CreateStitchedIP
from finn.transformation.fpgadataflow.annotate_cycles import AnnotateCycles
from finn.analysis.fpgadataflow.exp_cycles_per_layer import exp_cycles_per_layer
from finn.analysis.fpgadataflow.res_estimation import (
    res_estimation,
    res_estimation_complete
)
from finn.analysis.fpgadataflow.op_and_param_counts import (
    aggregate_dict_keys,
    op_and_param_counts
)
from finn.analysis.fpgadataflow.dataflow_performance import dataflow_performance
from finn.analysis.fpgadataflow.hls_synth_res_estimation import hls_synth_res_estimation
from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim
from finn.core.throughput_test import throughput_test_rtlsim
from copy import deepcopy
from finn.transformation.fpgadataflow.vitis_build import (
    VitisOptStrategy,
    VitisBuild
)
from finn.transformation.general import ApplyConfig

No CUDA runtime is found, using CUDA_HOME='/usr/local/cuda'


In [2]:
!ls /workspace/results/syn_baseline_v2/

dataflow_partition0_6ozjxmlt		    end2end_quartznet_streamline.onnx
dataflow_partition0_w509dzku		    end2end_quartznet_tidy.onnx
dataflow_partition1_2uh492jg		    folding_config.json
dataflow_partition2_i_cp5_2l		    folding_config_fifos.json
end2end_quartznet_dataflow_partition.onnx   librispeech_data
end2end_quartznet_export_dev.onnx	    outputs_test.npy
end2end_quartznet_fifos.onnx		    partitioning_lowering
end2end_quartznet_folded.onnx		    partitioning_repartition
end2end_quartznet_hls_layers.onnx	    qce_alveo_v2
end2end_quartznet_lowered.onnx		    qce_alveo_v2.zip
end2end_quartznet_lowered_partitioned.onnx  vitis_link_proj_qce_alveo_v2.zip


In [5]:
!ls /workspace/results/brevitas_benchmark/

quartznet.py  quartznet_val.py	time_cpu_bs_100.npy  time_cpu_bs_250.npy


In [24]:
a = np.load("/workspace/results/brevitas_benchmark/time_cpu_bs_100.npy", allow_pickle=True)

print(np.shape(a))
#print(a)
pre_process = 0
qn = 0
decoder = 0
total = 0
for i in range(29):
    batch = a[i][:]
    total = total + batch[0]
    pre_process = pre_process + batch[1][0]
    qn = qn + batch[1][1]
    decoder = decoder + batch[1][2]

print(pre_process)
print(qn)
print(decoder)
print(total)

(29, 2)
26.63143563270569
1496.0341720581055
1.980140209197998
1524.6822412014008


In [3]:
from finn.util.visualization import showInNetron

showInNetron("/workspace/results/temp/end2end_quartznet_fifos.onnx")

Serving '/workspace/results/temp/end2end_quartznet_fifos.onnx' at http://0.0.0.0:8081


In [5]:
showInNetron("/workspace/results/syn_optimized_buffer/end2end_quartznet_bitfile.onnx")

Stopping http://0.0.0.0:8081
Serving '/workspace/results/syn_optimized_buffer/end2end_quartznet_bitfile.onnx' at http://0.0.0.0:8081


In [4]:
from finn.util.visualization import showInNetron

showInNetron("/workspace/results/syn_baseline_v2/end2end_quartznet_export_dev.onnx")

Stopping http://0.0.0.0:8081
Serving '/workspace/results/syn_baseline_v2/end2end_quartznet_export_dev.onnx' at http://0.0.0.0:8081


In [13]:
model = ModelWrapper("/workspace/results/syn_baseline_v2/end2end_quartznet_export_dev.onnx")

model = model.transform(Change3DTo4DTensors())
# Absorb sign bias from export into MultiThreshold node
model = model.transform(AbsorbSignBiasIntoMultiThreshold())
# Collapse BatchNorm to Add and Mul
model = model.transform(BatchNormToAffine())
# Group multiplications
model = model.transform(MoveMulPastFork())
model = model.transform(MoveScalarMulPastConv())
model = model.transform(MoveMulPastDWConv())
# Move Mul/Add past join node
model = model.transform(MoveLinearPastEltwiseAdd())
# Collapes additions & multiplications
model = model.transform(CollapseRepeatedAdd())
model = model.transform(CollapseRepeatedMul())
# Absorb Add/Mul into multithreshold
model = model.transform(AbsorbAddIntoMultiThreshold())
model = model.transform(FactorOutMulSignMagnitude())
model = model.transform(Absorb1BitMulIntoConv())
model = model.transform(AbsorbMulIntoMultiThreshold())

# Ensure thresholds are integers
## Add quantization annotation to ensure RoundAndClipThresholds works
global_input_name = model.graph.input[0].name
model.set_tensor_datatype(global_input_name, DataType.INT8)
model = model.transform(InferDataTypes())
model = model.transform(RoundAndClipThresholds())

# Remove floating point scalar multiplication before argmax
mul_nodes = [x for x in model.graph.node if (x.op_type=="Mul")]
for n_mul in mul_nodes:
    input_mul = n_mul.input[0]
    node_after_mul = model.find_consumer(n_mul.output[0])
    node_after_mul.input[0] = input_mul
    model.graph.node.remove(n_mul)

model = model.transform(GiveUniqueNodeNames())
model = model.transform(GiveRandomTensorNames())
model = model.transform(GiveReadableTensorNames())
model = model.transform(GiveUniqueParameterTensors())

model.save("/tmp/model1.onnx")


In [14]:
showInNetron("/tmp/model1.onnx")

Stopping http://0.0.0.0:8081
Serving '/tmp/model1.onnx' at http://0.0.0.0:8081


In [22]:
model = ModelWrapper("/tmp/model1.onnx")
partitionings = {1: range(2, 75),
                2: range(75, 147),
                3: range(147, 219),
                4: range(219, 291),
                5: range(291, 363),
                6: range(363, 375)}
model = model.transform(PartitionFromDict(partitionings))

idx=0
for n in model.graph.node:
    if n.op_type=="GenericPartition":
        path_to_partition = get_by_name(n.attribute, "model", "name").s.decode('utf-8')
        model_partition = ModelWrapper(path_to_partition)
        # Lowering
        model_partition = model_partition.transform(LowerConvsToMatMul())
        # Absorb transpose node
        model_partition = model_partition.transform(AbsorbTransposeIntoMultiThreshold())
        # Reorder remaining transpose nodes
        model_partition = model_partition.transform(MoveTransposePastMultiThreshold())
        model_partition = model_partition.transform(MoveTransposePastJoinAdd())
        model_partition = model_partition.transform(MoveTransposeBeforeFork())

        model_partition.save(path_to_partition)
        
        idx+=1
        if idx==2:
            break
        
model.save("/tmp/model2.onnx")

In [23]:
showInNetron("/tmp/model2.onnx")

Stopping http://0.0.0.0:8081
Serving '/tmp/model2.onnx' at http://0.0.0.0:8081


In [24]:
showInNetron("/tmp/finn_dev_mirza/partitioning_hiptu4p7/partition_2.onnx")

Stopping http://0.0.0.0:8081
Serving '/tmp/finn_dev_mirza/partitioning_hiptu4p7/partition_2.onnx' at http://0.0.0.0:8081
