# Benchmarking tflite models  

This notebook requires the native benchmark binary for linux that you can get from this page:  
 [https://www.tensorflow.org/lite/performance/measurement](https://www.tensorflow.org/lite/performance/measurement)  

 This binary must be placed into the "benchmarking folder".

 This notebook must be run under LINUX!

 TensorFlow Lite benchmark tools currently measure and calculate statistics for the following important performance metrics:

- Initialization time
- Inference time of warmup state
- Inference time of steady state
- Memory usage during initialization time
- Overall memory usage

In [1]:
import os, sys, math, datetime
import pathlib
from pathlib import Path

# import workbench.config.config
from workbench.config.config import initialize
from workbench.utils.utils import create_filepaths
from workbench.wandb import wandb_model_DB, get_model_DB_run_id_from_architecture, get_architecture_from_model_DB_run_id

import wandb

In [2]:
import re
from matplotlib import pyplot as plt
#import plotly.express as px
import pandas as pd


# enable plotly in VS Studio Code
#import plotly.io as pio
#pio.renderers.default = "notebook_connected"
#pio.renderers.default = "plotly_mimetype+notebook"

import wandb

In [3]:
# Configure pandas to show all columns & rows
pd.set_option('display.max_columns', None)
#pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', None)

In [4]:
models_dir = initialize()

In [5]:
automated = False

global model_name
model_name = "mobilenetv1_0.1_96_c3_o2_l5.MV1"
#model_name = "mobilenetv2_0.5_96_c3_o2_l5"
#model_name = "mobilenetv2_0.25_96_c3_o2_t5l512.MV1"


In [6]:

models_path, models_summary_path, models_image_path, models_layer_df_path, models_tf_path, models_tflite_path, models_tflite_opt_path = create_filepaths(model_name)

/mnt/c/tiny_mlc/tiny_cnn/models


In [7]:
models_benchmark_path = models_dir.joinpath(model_name, f"{model_name}_benchmark.txt")
models_benchmark_path
models_performance_path = models_dir.joinpath(model_name, f"{model_name}_performance.txt")
models_performance_path

PosixPath('/mnt/c/tiny_mlc/tiny_cnn/models/mobilenetv1_0.1_96_c3_o2_l5.MV1/mobilenetv1_0.1_96_c3_o2_l5.MV1_performance.txt')

In [8]:
models_tflite_opt_path.as_posix()

'/mnt/c/tiny_mlc/tiny_cnn/models/mobilenetv1_0.1_96_c3_o2_l5.MV1/mobilenetv1_0.1_96_c3_o2_l5.MV1_INT8.tflite'

# Benchmarking for tflite - non quantized

In [9]:
# ! ./benchmarking/linux_x86-64_benchmark_model \
#     --graph=$models_tflite_path \
#     --num_threads=1 \
#     --enable_op_profiling=true \
#     | tee $models_benchmark_path

# Benchmarking for quantized .tflite file

# Performance for quantized tflite file

In [11]:
! ./benchmarking/linux_x86-64_benchmark_model_performance_options \
    --graph=$models_tflite_opt_path \
    --num_threads=1 \
    --enable_op_profiling=true \
    | tee $models_benchmark_path

INFO: STARTING!
INFO: The list of TFLite runtime options to be benchmarked: [all]
INFO: Log parameter values verbosely: [0]
INFO: Num threads: [1]
INFO: Graph: [/mnt/c/tiny_mlc/tiny_cnn/models/mobilenetv1_0.1_96_c3_o2_l5.MV1/mobilenetv1_0.1_96_c3_o2_l5.MV1_INT8.tflite]
INFO: Enable op profiling: [1]
INFO: #threads used for CPU inference: [1]
INFO: Use gpu: [0]
INFO: Use xnnpack: [1]
INFO: Loaded model /mnt/c/tiny_mlc/tiny_cnn/models/mobilenetv1_0.1_96_c3_o2_l5.MV1/mobilenetv1_0.1_96_c3_o2_l5.MV1_INT8.tflite
INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
INFO: XNNPACK delegate created.
INFO: Explicitly applied XNNPACK delegate, and the model graph will be partially executed by the delegate w/ 2 delegate kernels.
INFO: The input model file size (MB): 0.087176
INFO: Initialized session in 242.993ms.
INFO: Running benchmark for at least 1 iterations and at least 0.5 seconds but terminate if exceeding 150 seconds.
INFO: count=941 first=7052 curr=319 min=315 max=7676 avg=527.662 std

In [13]:
! ./benchmarking/linux_x86-64_benchmark_model \
    --graph=$models_tflite_opt_path \
    --num_threads=1 \
    --enable_op_profiling=true \
    | tee $models_benchmark_path

STARTING!
Log parameter values verbosely: [0]
Num threads: [1]
Graph: [/mnt/c/tiny_mlc/tiny_cnn/models/mobilenetv2_0.25_96_c3_o2_t5l512.MV1/mobilenetv2_0.25_96_c3_o2_t5l512.MV1_INT8.tflite]
Enable op profiling: [1]
#threads used for CPU inference: [1]
Loaded model /mnt/c/tiny_mlc/tiny_cnn/models/mobilenetv2_0.25_96_c3_o2_t5l512.MV1/mobilenetv2_0.25_96_c3_o2_t5l512.MV1_INT8.tflite
INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
The input model file size (MB): 0.282144
Initialized session in 120.179ms.
Running benchmark for at least 1 iterations and at least 0.5 seconds but terminate if exceeding 150 seconds.
count=736 first=3866 curr=782 min=435 max=4658 avg=676.192 std=332

Running benchmark for at least 50 iterations and at least 1 seconds but terminate if exceeding 150 seconds.
count=1750 first=851 curr=652 min=452 max=1421 avg=536.384 std=133

Inference timings in us: Init: 120179, First inference: 3866, Warmup (avg): 676.192, Inference (avg): 536.384
Note: as the benchmark 

In [10]:
if automated == False:
    ! code $models_benchmark_path

# Finding the tensor arena size

In [11]:
arena_size_path = Path.cwd().parent.joinpath("tflite-find-arena-size","build",  "find-arena-size")
arena_size_path

PosixPath('/mnt/c/tiny_mlc/tflite-find-arena-size/build/find-arena-size')

In [15]:
%%capture arena_size
! $arena_size_path $models_tflite_path


In [16]:
# %%capture optimal_runtime_INT

In [17]:
arena_size_raw = arena_size.stdout.strip()
arena_size_raw

'edge-impulse-sdk/tensorflow/lite/micro/kernels/quantize.cc:68 input->type == kTfLiteFloat32 || input->type == kTfLiteInt16 || input->type == kTfLiteInt8 was not true.\r\r\nNode QUANTIZE (number 0f) failed to prepare with status 1'

In [13]:
try:
    import ast
    arena_size_dict = ast.literal_eval(arena_size_raw)
    arena_size = arena_size_dict["arena_size"]
    arena_size
except:
    arena_size = 0

In [20]:
run_id = get_model_DB_run_id_from_architecture(model_name)
run_id

'h2ee651d'

In [21]:
type(run_id)

str

In [22]:
if len(run_id) > 1:

        PROJECT = "model_DB"

        run = wandb.init(
                # Set the project where this run will be logged
                project=PROJECT, 
                id = run_id, 
                resume="allow",
                )

        run.log({"arena_size" : arena_size})

        wandb.finish()

else:
        print(f"Could not find run_id {run_id}!")

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33msusbrock[0m. Use [1m`wandb login --relogin`[0m to force relogin


0,1
arena_size,▁

0,1
allocate_tensors_ms_%,0.491
allocate_tensors_ms_avg,0.033
allocate_tensors_ms_first,0.033
arena_size,285664.0
first_inference_us,3787.0
inference_avg_us,486.023
init_us,133940.0
initialization_ms,133.94
model_size_MB,0.28214
modify_graph_with_delegate_mem_KB,1164.0


In [23]:
# from contextlib import redirect_stdout

# with open("runtime.txt", "w", encoding='utf-8') as f:
#     with redirect_stdout(f):
#         print(profile_model(str(models_tflite_opt_path), accelerator=None, build=False))