In [1]:
import csv
import finn
import finn.builder.build_dataflow as build
import finn.builder.build_dataflow_config as build_cfg
from finn.transformation.fpgadataflow.create_dataflow_partition import CreateDataflowPartition
from qonnx.transformation.change_3d_tensors_to_4d import Change3DTo4DTensors
from qonnx.transformation.general import RemoveUnusedTensors, GiveUniqueNodeNames
from qonnx.transformation.lower_convs_to_matmul import LowerConvsToMatMul
from finn.transformation.qonnx.convert_qonnx_to_finn import ConvertQONNXtoFINN
from finn.transformation.streamline import Streamline
from finn.util.visualization import showSrc
from finn.util.basic import make_build_dir
import json
import os
import onnx
from qonnx.core.modelwrapper import ModelWrapper
from qonnx.util.cleanup import cleanup
import shutil

file_path = str('../workspace/jh_fpga_amr/src/py/models')
with open('est_all_output.csv', mode='w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(['File Name', 'BRAM_18K', 'LUT', 'URAM', 'DSP', 'critical_path_cycles', 'max_cycles', 'estimated_throughput_fps', 'estimated_latency_ns', 'FMPadding_rtl_0', 'ConvolutionInputGenerator_rtl_0', 'MVAU_rtl_0'])

    ## Iterate through each .onnx model, process, and run estimates. Record metrics.
    for file_name in os.listdir(file_path):
        if file_name.endswith('.onnx'):
            #full_path = os.path.join(file_path, file_name)
            #qonnx_model = onnx.load(os.path.join(file_path, file_name))
            # **** Running this after synth, so onnx files do not need cleanup. ****
            ## Run QONNX cleanup.
            #cleanup_path = os.path.join(file_path, 'cleanup', file_name)
            #os.makedirs(os.path.dirname(cleanup_path), exist_ok=True)
            #cleanup(full_path, out_file=cleanup_path)
            
            ## Load model using ModelWrapper and convert to FINN format from QONNX.
            #model = ModelWrapper(cleanup_path)
            #model = model.transform(ConvertQONNXtoFINN())
            
            ## Apply necessary transformations.
            #model = model.transform(Streamline()) # Only single model path supported.
            #model = model.transform(Change3DTo4DTensors()) # Necessary, FINN doesn't like 1d.
            #model = model.transform(LowerConvsToMatMul()) # Also necessary for build, need the 4D conversion first.
            #model = model.transform(GiveUniqueNodeNames())
            #model = model.transform(RemoveUnusedTensors())
            #model = model.transform(CreateDataflowPartition())
            
            verif_model_path = os.path.join(file_path, 'verif', file_name)
            
            ## Runs a synth build to view rtlsim performance. All pulled from 3-build-accelerator-with-finn.
            estimates_output_dir = os.path.join(file_path, 'output_estimates', file_name)
            
            # Delete previous run results if they exist.
            if os.path.exists(estimates_output_dir):
                shutil.rmtree(estimates_output_dir)
                print("Previous run results deleted!")
            
            cfg_estimates = build.DataflowBuildConfig(
                output_dir          = estimates_output_dir,
                mvau_wwidth_max     = 80,
                target_fps          = 1000000,
                synth_clk_period_ns = 10.0,
                fpga_part           = "xc7z020clg400-1",
                steps               = build_cfg.estimate_only_dataflow_steps,
                generate_outputs=[
                    build_cfg.DataflowOutputType.ESTIMATE_REPORTS,
                ]
            )
            
            ## Run estimate-only build, faster than synthesis.
            build.build_dataflow_cfg(verif_model_path, cfg_estimates)

            ## Check hardware utilization estimates.
            # Layer-by-layer estimation of resources.
            json_file_path = os.path.join(estimates_output_dir, "report", "estimate_layer_resources.json")
            with open(json_file_path, 'r') as file:
                data_util = json.load(file)

            ## Check network performance estimates. Tends to over-estimate since it cannot capture the effects of various synth optimizations.
            json_file_path = os.path.join(estimates_output_dir, "report", "estimate_network_performance.json")
            with open(json_file_path, 'r') as file:
                data_est = json.load(file)
            
            ## Check layer cycle estimates.
            # Estimated number of clk cycles each layer will take, all layers are running in parallel so slowest layer will determine
            # overall throughput. FINN tries to parallelize layers so they all take a similar number of cycles while meeting timing.
            # Summing up all layer cycle estimates gives an estimate for overall latency of entire network.
            perf_json_path = os.path.join(estimates_output_dir, "report", "estimate_layer_cycles.json")
            with open(perf_json_path, 'r') as file:
                data_cyc = json.load(file)
            
            # Export metrics to CSV.
            writer.writerow([
                file_name,
                data_util.get('BRAM_18K', ''),
                data_util.get('LUT', ''),
                data_util.get('URAM', ''),
                data_util.get('DSP', ''),
                data_est.get('critical_path_cycles', ''),
                data_est.get('max_cycles', ''),
                data_est.get('estimated_throughput_fps', ''),
                data_est.get('estimated_latency_ns', ''),
                data_cyc.get('FMPadding_rtl_0', ''),
                data_cyc.get('ConvolutionInputGenerator_rtl_0', ''),
                data_cyc.get('MVAU_rtl_0', ''),
            ])

Building dataflow accelerator from ../workspace/jh_fpga_amr/src/py/models/verif/vgglike_4f_4c_3re_3mp_pr0.1_quant8.onnx
Intermediate outputs will be generated in /tmp/finn_dev_rothej
Final outputs will be generated in ../workspace/jh_fpga_amr/src/py/models/output_estimates/vgglike_4f_4c_3re_3mp_pr0.1_quant8.onnx
Build log is at ../workspace/jh_fpga_amr/src/py/models/output_estimates/vgglike_4f_4c_3re_3mp_pr0.1_quant8.onnx/build_dataflow.log
Running step: step_qonnx_to_finn [1/10]
Running step: step_tidy_up [2/10]
Running step: step_streamline [3/10]
Running step: step_convert_to_hw [4/10]
Running step: step_create_dataflow_partition [5/10]
Running step: step_specialize_layers [6/10]
Running step: step_target_fps_parallelization [7/10]
Running step: step_apply_folding_config [8/10]
Running step: step_minimize_bit_width [9/10]
Running step: step_generate_estimate_reports [10/10]
Completed successfully
Building dataflow accelerator from ../workspace/jh_fpga_amr/src/py/models/verif/vgglike