# Benchmarking tflite models  

This notebook requires the native benchmark binary for linux that you can get from this page:  
 [https://www.tensorflow.org/lite/performance/measurement](https://www.tensorflow.org/lite/performance/measurement)  

 This binary must be placed into the "benchmarking folder".

 This notebook must be run under LINUX!

 TensorFlow Lite benchmark tools currently measure and calculate statistics for the following important performance metrics:

- Initialization time
- Inference time of warmup state
- Inference time of steady state
- Memory usage during initialization time
- Overall memory usage

In [29]:
import os, sys, math, datetime
import pathlib
from pathlib import Path

# import workbench.config.config
from workbench.config.config import initialize
from workbench.utils.utils import create_filepaths
from workbench.wandb import wandb_model_DB, get_model_DB_run_id_from_architecture, get_architecture_from_model_DB_run_id

import wandb

In [5]:
import re
from matplotlib import pyplot as plt
#import plotly.express as px
import pandas as pd


# enable plotly in VS Studio Code
#import plotly.io as pio
#pio.renderers.default = "notebook_connected"
#pio.renderers.default = "plotly_mimetype+notebook"

import wandb

In [6]:
# Configure pandas to show all columns & rows
pd.set_option('display.max_columns', None)
#pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', None)

In [7]:
models_dir = initialize()

In [8]:
automated = False

global model_name
#model_name = "efficientNetB0_1_96_c1_o3_keras"
model_name = "mobilenetv2_0.5_96_c3_o2_l5"
model_name = "mobilenetv2_0.25_96_c3_o2_t5l512.MV1"


In [9]:

models_path, models_summary_path, models_image_path, models_layer_df_path, models_tf_path, models_tflite_path, models_tflite_opt_path = create_filepaths(model_name)

/mnt/i/tinyml/tiny_cnn/models


In [10]:
models_benchmark_path = models_dir.joinpath(model_name, f"{model_name}_benchmark.txt")
models_benchmark_path

PosixPath('/mnt/i/tinyml/tiny_cnn/models/mobilenetv2_0.25_96_c3_o2_t5l512.MV1/mobilenetv2_0.25_96_c3_o2_t5l512.MV1_benchmark.txt')

In [11]:
models_tflite_opt_path.as_posix()

'/mnt/i/tinyml/tiny_cnn/models/mobilenetv2_0.25_96_c3_o2_t5l512.MV1/mobilenetv2_0.25_96_c3_o2_t5l512.MV1_INT8.tflite'

# Benchmarking for tflite - non quantized

In [12]:
# ! ./benchmarking/linux_x86-64_benchmark_model \
#     --graph=$models_tflite_path \
#     --num_threads=1 \
#     --enable_op_profiling=true \
#     | tee $models_benchmark_path

# Benchmarking for quantized .tflite file

In [16]:
! ./benchmarking/linux_x86-64_benchmark_model \
    --graph=$models_tflite_opt_path \
    --num_threads=1 \
    --enable_op_profiling=true \
    | tee $models_benchmark_path

STARTING!
Log parameter values verbosely: [0]
Num threads: [1]
Graph: [/mnt/i/tinyml/tiny_cnn/models/mobilenetv2_0.5_96_c3_o2_l5/mobilenetv2_0.5_96_c3_o2_l5_INT8.tflite]
Enable op profiling: [1]
#threads used for CPU inference: [1]
ERROR: Could not open '/mnt/i/tinyml/tiny_cnn/models/mobilenetv2_0.5_96_c3_o2_l5/mobilenetv2_0.5_96_c3_o2_l5_INT8.tflite'.
ERROR: The model allocation is null/empty
Failed to load model /mnt/i/tinyml/tiny_cnn/models/mobilenetv2_0.5_96_c3_o2_l5/mobilenetv2_0.5_96_c3_o2_l5_INT8.tflite
Benchmarking failed.


In [18]:
if automated == False:
    ! code $models_benchmark_path

# Finding the tensor arena size

In [16]:
%%capture arena_size
! /mnt/i/tinyml/tflite-find-arena-size/build/find-arena-size $models_tflite_path


UsageError: Line magic function `%capture` not found (But cell magic `%%capture` exists, did you mean that instead?).


In [None]:
# %%capture optimal_runtime_INT

In [24]:
arena_size_raw = arena_size.stdout.strip()
type(arena_size_raw)

str

In [26]:
import ast
arena_size_dict = ast.literal_eval(arena_size_raw)
arena_size = arena_size_dict["arena_size"]
arena_size

285664

In [30]:
id = get_model_DB_run_id_from_architecture(model_name)
id

'fgxqsgfd'

In [32]:
PROJECT = "model_DB"

run = wandb.init(
        # Set the project where this run will be logged
        project=PROJECT, 
        id = id, 
        resume="allow",
        )

run.log({"arena_size" : arena_size})

wandb.finish()

0,1
allocate_tensors_ms_%,0.835
allocate_tensors_ms_avg,0.08
allocate_tensors_ms_first,0.08
first_inference_us,5536.0
inference_avg_us,734.681
init_us,212645.0
initialization_ms,212.645
model_size_MB,0.28214
modify_graph_with_delegate_mem_KB,1160.0
modify_graph_with_delegate_ms_%,99.165


0,1
arena_size,▁

0,1
allocate_tensors_ms_%,0.835
allocate_tensors_ms_avg,0.08
allocate_tensors_ms_first,0.08
arena_size,285664.0
first_inference_us,5536.0
inference_avg_us,734.681
init_us,212645.0
initialization_ms,212.645
model_size_MB,0.28214
modify_graph_with_delegate_mem_KB,1160.0


In [None]:
# from contextlib import redirect_stdout

# with open("runtime.txt", "w", encoding='utf-8') as f:
#     with redirect_stdout(f):
#         print(profile_model(str(models_tflite_opt_path), accelerator=None, build=False))