In [1]:
import csv
import finn
import finn.builder.build_dataflow as build
import finn.builder.build_dataflow_config as build_cfg
from finn.transformation.fpgadataflow.create_dataflow_partition import CreateDataflowPartition
from qonnx.transformation.change_3d_tensors_to_4d import Change3DTo4DTensors
from qonnx.transformation.general import RemoveUnusedTensors, GiveUniqueNodeNames
from qonnx.transformation.lower_convs_to_matmul import LowerConvsToMatMul
from finn.transformation.qonnx.convert_qonnx_to_finn import ConvertQONNXtoFINN
from finn.transformation.streamline import Streamline
from finn.util.visualization import showSrc
from finn.util.basic import make_build_dir
import json
import os
import onnx
from qonnx.core.modelwrapper import ModelWrapper
from qonnx.util.cleanup import cleanup
import shutil

file_path = str('../workspace/jh_fpga_amr/src/py/models')
with open('build_all_output.csv', mode='w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(['File Name', 'LUT', 'LUTRAM', 'FF', 'DSP', 'BRAM', 'BRAM_18K', 'BRAM_36K', 'URAM', 'Carry', 'WNS', 'Delay', 'vivado_version', 'vivado_build_no', 'fmax_mhz', 'estimated_throughput_fps', 'N_IN_TXNS', 'N_OUT_TXNS', 'cycles', 'N', 'latency_cycles', 'runtime[ms]', 'throughput[images/s]', 'fclk[mhz]', 'stable_throughput[images/s]'])

    ## Iterate through each .onnx model, process, and synthesize. Record metrics.
    for file_name in os.listdir(file_path):
        if file_name.endswith('.onnx'):
            full_path = os.path.join(file_path, file_name)
            qonnx_model = onnx.load(os.path.join(file_path, file_name))
    
            ## Run QONNX cleanup.
            cleanup_path = os.path.join(file_path, 'cleanup', file_name)
            os.makedirs(os.path.dirname(cleanup_path), exist_ok=True)
            cleanup(full_path, out_file=cleanup_path)
            
            ## Load model using ModelWrapper and convert to FINN format from QONNX.
            model = ModelWrapper(cleanup_path)
            model = model.transform(ConvertQONNXtoFINN())
            
            ## Apply necessary transformations.
            model = model.transform(Streamline()) # Only single model path supported.
            model = model.transform(Change3DTo4DTensors()) # Necessary, FINN doesn't like 1d.
            model = model.transform(LowerConvsToMatMul()) # Also necessary for build, need the 4D conversion first.
            model = model.transform(GiveUniqueNodeNames())
            model = model.transform(RemoveUnusedTensors())
            model = model.transform(CreateDataflowPartition())
            
            verif_model_path = os.path.join(file_path, 'verif', file_name)
            os.makedirs(os.path.dirname(verif_model_path), exist_ok=True)
            model.save(verif_model_path)
            
            ## Runs a synth build to view rtlsim performance. All pulled from 3-build-accelerator-with-finn.
            rtlsim_output_dir = os.path.join(file_path, 'output_rtl', file_name)
            
            # Delete previous run results if they exist.
            if os.path.exists(rtlsim_output_dir):
                shutil.rmtree(rtlsim_output_dir)
                print("Previous run results deleted!")
            
            cfg_stitched_ip = build.DataflowBuildConfig(
                output_dir          = rtlsim_output_dir,
                mvau_wwidth_max     = 80,
                target_fps          = 1000000,
                synth_clk_period_ns = 10.0,
                fpga_part           = "xc7z020clg400-1",
                generate_outputs=[
                    build_cfg.DataflowOutputType.STITCHED_IP,
                    build_cfg.DataflowOutputType.RTLSIM_PERFORMANCE,
                    build_cfg.DataflowOutputType.OOC_SYNTH,
                ]
            )
            
            ## Synthesize model. Note: This will take ~ 10 mins to complete, uses Vivado for build.
            build.build_dataflow_cfg(verif_model_path, cfg_stitched_ip)
            
            ## Check hardware utilization estimates post-synthesis.
            json_file_path = os.path.join(rtlsim_output_dir, "report", "ooc_synth_and_timing.json")
            
            with open(json_file_path, 'r') as file:
                data = json.load(file)
            
            # LUT - number of LUTs used.
            # LUTRAM - number of LUTs configured as RAM.
            # FF - number of FFs used.
            # DSP - number of DSP blocks used. Synth usually tries to conserve these when not needed because they are valuable.
            # BRAM - total block RAM tiles used.
            # Carry - carry chains used, used for arith. operations.
            # WNS - worst negative slack, positive value means timing is met.
            
            ## Check performance estimates post-synthesis.
            perf_json_path = os.path.join(rtlsim_output_dir, "report", "rtlsim_performance.json")
            
            with open(perf_json_path, 'r') as file:
                perf_data = json.load(file)
            
            # N_IN_TXNS - number of input transactions.
            # N_OUT_TXNS - number of output transactions.
            # cycles - total number of clk cycles for the process.
            # N - number of operations (batch size) handled in a single cycle.
            # latency_cycles - total cycle count.
            
            # Export metrics to CSV.
            writer.writerow([
                file_name,
                data.get('LUT', ''),
                data.get('LUTRAM', ''),
                data.get('FF', ''),
                data.get('DSP', ''),
                data.get('BRAM', ''),
                data.get('BRAM_18K', ''),
                data.get('BRAM_36K', ''),
                data.get('URAM', ''),
                data.get('Carry', ''),
                data.get('WNS', ''),
                data.get('Delay', ''),
                data.get('vivado_version', ''),
                data.get('vivado_build_no', ''),
                data.get('fmax_mhz', ''),
                data.get('estimated_throughput_fps', ''),
                perf_data.get('N_IN_TXNS', ''),
                perf_data.get('N_OUT_TXNS', ''),
                perf_data.get('cycles', ''),
                perf_data.get('N', ''),
                perf_data.get('latency_cycles', ''),
                perf_data.get('runtime[ms]', ''),
                perf_data.get('throughput[images/s]', ''),
                perf_data.get('fclk[mhz]', ''),
                perf_data.get('stable_throughput[images/s]', '')
            ])



Previous run results deleted!
Building dataflow accelerator from ../workspace/jh_fpga_amr/src/py/models/verif/vgglike_4f_4c_3re_3mp_pr0.1_quant8.onnx
Intermediate outputs will be generated in /tmp/finn_dev_rothej
Final outputs will be generated in ../workspace/jh_fpga_amr/src/py/models/output_rtl/vgglike_4f_4c_3re_3mp_pr0.1_quant8.onnx
Build log is at ../workspace/jh_fpga_amr/src/py/models/output_rtl/vgglike_4f_4c_3re_3mp_pr0.1_quant8.onnx/build_dataflow.log
Running step: step_qonnx_to_finn [1/19]
Running step: step_tidy_up [2/19]
Running step: step_streamline [3/19]
Running step: step_convert_to_hw [4/19]
Running step: step_create_dataflow_partition [5/19]
Running step: step_specialize_layers [6/19]
Running step: step_target_fps_parallelization [7/19]
Running step: step_apply_folding_config [8/19]
Running step: step_minimize_bit_width [9/19]
Running step: step_generate_estimate_reports [10/19]
Running step: step_hw_codegen [11/19]
Running step: step_hw_ipgen [12/19]
Running step: step

ValueError: I/O operation on closed file.