In [None]:
# Launch a Build: Only Estimate Reports 
# For Avnet Ultra96-v2 Board

In [1]:
import finn.builder.build_dataflow as build
import finn.builder.build_dataflow_config as build_cfg
import os
import shutil

model_file = "finn_lenet_verification.onnx"

estimates_output_dir = "output_estimates_only"

#Delete previous run results if exist
if os.path.exists(estimates_output_dir):
    shutil.rmtree(estimates_output_dir)
    print("Previous run results deleted!")


cfg_estimates = build.DataflowBuildConfig(
    output_dir          = estimates_output_dir,
    mvau_wwidth_max     = 80,
    target_fps          = 1000000,
    synth_clk_period_ns = 10.0,
    fpga_part           = "xczu3eg-sbva484-1-i",
    steps               = build_cfg.estimate_only_dataflow_steps,
    generate_outputs=[
        build_cfg.DataflowOutputType.ESTIMATE_REPORTS,
    ]
)

In [2]:
%%time
build.build_dataflow_cfg(model_file, cfg_estimates)

Building dataflow accelerator from finn_lenet_verification.onnx
Intermediate outputs will be generated in /home/rstar900/finn/my_builds
Final outputs will be generated in output_estimates_only
Build log is at output_estimates_only/build_dataflow.log
Running step: step_qonnx_to_finn [1/8]
Running step: step_tidy_up [2/8]
Running step: step_streamline [3/8]
Running step: step_convert_to_hls [4/8]
Running step: step_create_dataflow_partition [5/8]
Running step: step_target_fps_parallelization [6/8]
Running step: step_apply_folding_config [7/8]
Running step: step_generate_estimate_reports [8/8]
Completed successfully
CPU times: user 1.17 s, sys: 0 ns, total: 1.17 s
Wall time: 1.18 s


0

In [3]:
# We'll now examine the generated outputs from this build
# If we look under the outputs directory, we'll find a subfolder with the generated estimate reports.

In [4]:
! ls {estimates_output_dir}

auto_folding_config.json  intermediate_models  time_per_step.json
build_dataflow.log	  report


In [5]:
! ls {estimates_output_dir}/report

estimate_layer_config_alternatives.json  estimate_network_performance.json
estimate_layer_cycles.json		 op_and_param_counts.json
estimate_layer_resources.json


In [6]:
#  Let's examine the contents of the estimate_network_performance.json for starters. 
# Here, we can see the analytical estimates for the performance and latency
! cat {estimates_output_dir}/report/estimate_network_performance.json

{
  "critical_path_cycles": 89317,
  "max_cycles": 19760,
  "max_cycles_node_name": "ConvolutionInputGenerator_0",
  "estimated_throughput_fps": 5060.728744939272,
  "estimated_latency_ns": 893170.0
}

In [7]:
# We can see the layer-by-layer resource estimates in the estimate_layer_resources.json report 
# We can see if the layers will fit our FPGA using this report, if too high, consider lowering target_fps
import json
def read_json_dict(filename):
    with open(filename, "r") as f:
        ret = json.load(f)
    return ret

read_json_dict(estimates_output_dir + "/report/estimate_layer_resources.json")

{'ConvolutionInputGenerator_0': {'BRAM_18K': 0,
  'BRAM_efficiency': 1,
  'LUT': 372,
  'URAM': 0,
  'URAM_efficiency': 1,
  'DSP': 0},
 'MatrixVectorActivation_0': {'BRAM_18K': 3,
  'BRAM_efficiency': 0.0244140625,
  'LUT': 1732,
  'URAM': 0,
  'URAM_efficiency': 1,
  'DSP': 0},
 'StreamingMaxPool_Batch_0': {'BRAM_18K': 0,
  'BRAM_efficiency': 1,
  'LUT': 0,
  'URAM': 0,
  'URAM_efficiency': 1,
  'DSP': 0},
 'ConvolutionInputGenerator_1': {'BRAM_18K': 0,
  'BRAM_efficiency': 1,
  'LUT': 348,
  'URAM': 0,
  'URAM_efficiency': 1,
  'DSP': 0},
 'MatrixVectorActivation_1': {'BRAM_18K': 2,
  'BRAM_efficiency': 0.1953125,
  'LUT': 1381,
  'URAM': 0,
  'URAM_efficiency': 1,
  'DSP': 0},
 'StreamingMaxPool_Batch_1': {'BRAM_18K': 0,
  'BRAM_efficiency': 1,
  'LUT': 0,
  'URAM': 0,
  'URAM_efficiency': 1,
  'DSP': 0},
 'MatrixVectorActivation_2': {'BRAM_18K': 12,
  'BRAM_efficiency': 0.6510416666666666,
  'LUT': 1255,
  'URAM': 0,
  'URAM_efficiency': 1,
  'DSP': 0},
 'MatrixVectorActivation_3'

In [8]:
# Have a look at estimate_layer_cycles.json
read_json_dict(estimates_output_dir + "/report/estimate_layer_cycles.json")

{'ConvolutionInputGenerator_0': 19760,
 'MatrixVectorActivation_0': 14112,
 'StreamingMaxPool_Batch_0': 980,
 'ConvolutionInputGenerator_1': 15420,
 'MatrixVectorActivation_1': 16000,
 'StreamingMaxPool_Batch_1': 125,
 'MatrixVectorActivation_2': 12000,
 'MatrixVectorActivation_3': 10080,
 'MatrixVectorActivation_4': 840}

In [9]:
# Launch a Build: Stitched IP, out-of-context synth and rtlsim Performance

In [23]:
import finn.builder.build_dataflow as build
import finn.builder.build_dataflow_config as build_cfg
import os
import shutil

model_file = "finn_lenet_verification.onnx"

rtlsim_output_dir = "output_ipstitch_ooc_rtlsim"

#Delete previous run results if exist
if os.path.exists(rtlsim_output_dir):
    shutil.rmtree(rtlsim_output_dir)
    print("Previous run results deleted!")

cfg_stitched_ip = build.DataflowBuildConfig(
    output_dir          = rtlsim_output_dir,
    mvau_wwidth_max     = 80,
    target_fps          = 1000000,
    synth_clk_period_ns = 10.0,
    fpga_part           = "xczu3eg-sbva484-1-i",
    generate_outputs=[
        build_cfg.DataflowOutputType.STITCHED_IP,
        build_cfg.DataflowOutputType.RTLSIM_PERFORMANCE,
        build_cfg.DataflowOutputType.OOC_SYNTH,
    ]
)

Previous run results deleted!


In [24]:
%%time
build.build_dataflow_cfg(model_file, cfg_stitched_ip)

Building dataflow accelerator from finn_lenet_verification.onnx
Intermediate outputs will be generated in /home/rstar900/finn/my_builds
Final outputs will be generated in output_ipstitch_ooc_rtlsim
Build log is at output_ipstitch_ooc_rtlsim/build_dataflow.log
Running step: step_qonnx_to_finn [1/17]
Running step: step_tidy_up [2/17]
Running step: step_streamline [3/17]


Traceback (most recent call last):
  File "/home/rstar900/finn/src/finn/builder/build_dataflow.py", line 166, in build_dataflow_cfg
    model = transform_step(model, cfg)
  File "/home/rstar900/finn/src/finn/builder/build_dataflow_steps.py", line 426, in step_hls_codegen
    model = model.transform(
  File "/home/rstar900/finn/deps/qonnx/src/qonnx/core/modelwrapper.py", line 140, in transform
    (transformed_model, model_was_changed) = transformation.apply(transformed_model)
  File "/home/rstar900/finn/src/finn/transformation/fpgadataflow/prepare_ip.py", line 88, in apply
    _codegen_single_node(node, model, self.fpgapart, self.clk)
  File "/home/rstar900/finn/src/finn/transformation/fpgadataflow/prepare_ip.py", line 55, in _codegen_single_node
    inst.code_generation_ipgen(model, fpgapart, clk)
  File "/home/rstar900/finn/src/finn/custom_op/fpgadataflow/hlscustomop.py", line 271, in code_generation_ipgen
    self.generate_params(model, path)
  File "/home/rstar900/finn/src/finn/cus

Running step: step_convert_to_hls [4/17]
Running step: step_create_dataflow_partition [5/17]
Running step: step_target_fps_parallelization [6/17]
Running step: step_apply_folding_config [7/17]
Running step: step_generate_estimate_reports [8/17]
Running step: step_hls_codegen [9/17]
> [0;32m/opt/conda/lib/python3.8/site-packages/numpy/__init__.py[0m(284)[0;36m__getattr__[0;34m()[0m
[0;32m    282 [0;31m            [0;32mreturn[0m [0mTester[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m    283 [0;31m[0;34m[0m[0m
[0m[0;32m--> 284 [0;31m        raise AttributeError("module {!r} has no attribute "
[0m[0;32m    285 [0;31m                             "{!r}".format(__name__, attr))
[0m[0;32m    286 [0;31m[0;34m[0m[0m
[0m
--KeyboardInterrupt--

KeyboardInterrupt: Interrupted by user
Build failed
CPU times: user 1.16 s, sys: 12.3 ms, total: 1.17 s
Wall time: 8.47 s


-1