# Benchmarking tflite models  

This notebook requires the native benchmark binary for linux that you can get from this page:  
 [https://www.tensorflow.org/lite/performance/measurement](https://www.tensorflow.org/lite/performance/measurement)  

 This binary must be placed into the "benchmarking folder".

 This notebook must be run under LINUX!

 TensorFlow Lite benchmark tools currently measure and calculate statistics for the following important performance metrics:

- Initialization time
- Inference time of warmup state
- Inference time of steady state
- Memory usage during initialization time
- Overall memory usage

In [1]:
import os, sys, math, datetime
import pathlib
from pathlib import Path

# import workbench.config.config
from workbench.config.config import initialize
from workbench.utils.utils import create_filepaths

In [28]:
import re
from matplotlib import pyplot as plt
import plotly.express as px
import pandas as pd


# enable plotly in VS Studio Code
import plotly.io as pio
#pio.renderers.default = "notebook_connected"
pio.renderers.default = "plotly_mimetype+notebook"

import wandb

In [96]:
# Configure pandas to show all columns & rows
pd.set_option('display.max_columns', None)
#pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', None)

In [97]:
models_dir = initialize()

In [98]:
global model_name
#model_name = "efficientNetB0_1_96_c1_o3_keras"
#model_name = "mobilenetv2_0.1_96_c1_o3_l1"
model_name = "shufflenetv1_1_96_c3_o3_000"

models_path, models_summary_path, models_image_path, models_layer_df_path, models_tf_path, models_tflite_path, models_tflite_opt_path = create_filepaths(model_name)

i:\tinyml\tiny_cnn\models


In [5]:
models_benchmark_path = models_dir.joinpath(model_name, f"{model_name}_benchmark.txt")
models_benchmark_path

WindowsPath('i:/tinyml/tiny_cnn/models/shufflenetv1_1_96_c3_o3_000/shufflenetv1_1_96_c3_o3_000_benchmark.txt')

In [6]:
models_tflite_path.as_posix()

'i:/tinyml/tiny_cnn/models/shufflenetv1_1_96_c3_o3_000/shufflenetv1_1_96_c3_o3_000.tflite'

In [7]:
! ./benchmarking/linux_x86-64_benchmark_model \
    --graph=$models_tflite_path \
    --num_threads=1 \
    --enable_op_profiling=true \
    | tee $models_benchmark_path

Der Befehl "." ist entweder falsch geschrieben oder
konnte nicht gefunden werden.


# Helper functions

In [99]:

def clean_model_summary(filepath): 
    clean_lines = []
#     # Parse the MLTK model summary to grab important metrics   
    with open(filepath, "r", encoding="latin-1") as f:
        lines = f.readlines() # list containing lines of file
        for line in lines:
            line = line.strip() # remove leading/trailing white spaces
            if len(line)> 0:
                
                clean_lines.append(line)
            else:
                pass
        #columns = [] # To store column names
    return clean_lines

In [100]:
def clean_column_names(df):
    cols = df.columns

    clean_cols = []
    for col in cols:
        col = col.strip()
        col = col.replace("[" , "")
        col = col.replace("]" , "")   
        clean_cols.append(col)
        
    return clean_cols

In [102]:
def string_percent_to_float(x):
    x = str(x).strip("%")
    return float(x)

In [103]:
def remove_tabs(text):
    """removes tabs from a list of strings

    Args:
        text (list(str)): list of strings that contains tabs

    Returns:
        list(str): list of strings without tabs
    """
    split_text= []
    for l in text:
        split_text.append((l.split("\t")))
        
    return split_text

# Parsing the model file

In [104]:
lines = clean_model_summary(models_benchmark_path)

In [105]:
lines

['STARTING!',
 'Log parameter values verbosely: [0]',
 'Num threads: [1]',
 'Graph: [/mnt/i/tinyml/tiny_cnn/models/shufflenetv1_1_96_c3_o3_000/shufflenetv1_1_96_c3_o3_000.tflite]',
 'Enable op profiling: [1]',
 '#threads used for CPU inference: [1]',
 'Loaded model /mnt/i/tinyml/tiny_cnn/models/shufflenetv1_1_96_c3_o3_000/shufflenetv1_1_96_c3_o3_000.tflite',
 'The input model file size (MB): 3.72216',
 'Initialized session in 236.6ms.',
 'Running benchmark for at least 1 iterations and at least 0.5 seconds but terminate if exceeding 150 seconds.',
 'count=20 first=85850 curr=22003 min=20402 max=85850 avg=25043.7 std=14034',
 'Running benchmark for at least 50 iterations and at least 1 seconds but terminate if exceeding 150 seconds.',
 'count=50 first=24295 curr=21011 min=20370 max=33853 avg=21881.2 std=2184',
 'Inference timings in us: Init: 236600, First inference: 85850, Warmup (avg): 25043.7, Inference (avg): 21881.2',
 'Note: as the benchmark tool itself affects memory footprint, t

In [106]:
for i, line in enumerate(lines):
    #if line.str.contains('= Run Order ='):
    if "Operator-wise Profiling Info for Regular Benchmark Runs:" in line:
        split_line = i
    else:
        pass

In [107]:
lines[lines_dict["run_order"]]



In [108]:
model_profiling = lines[:split_line]
model_profiling

['STARTING!',
 'Log parameter values verbosely: [0]',
 'Num threads: [1]',
 'Graph: [/mnt/i/tinyml/tiny_cnn/models/shufflenetv1_1_96_c3_o3_000/shufflenetv1_1_96_c3_o3_000.tflite]',
 'Enable op profiling: [1]',
 '#threads used for CPU inference: [1]',
 'Loaded model /mnt/i/tinyml/tiny_cnn/models/shufflenetv1_1_96_c3_o3_000/shufflenetv1_1_96_c3_o3_000.tflite',
 'The input model file size (MB): 3.72216',
 'Initialized session in 236.6ms.',
 'Running benchmark for at least 1 iterations and at least 0.5 seconds but terminate if exceeding 150 seconds.',
 'count=20 first=85850 curr=22003 min=20402 max=85850 avg=25043.7 std=14034',
 'Running benchmark for at least 50 iterations and at least 1 seconds but terminate if exceeding 150 seconds.',
 'count=50 first=24295 curr=21011 min=20370 max=33853 avg=21881.2 std=2184',
 'Inference timings in us: Init: 236600, First inference: 85850, Warmup (avg): 25043.7, Inference (avg): 21881.2',
 'Note: as the benchmark tool itself affects memory footprint, t

# Operator profiling

In [109]:
operator_profiling = lines[split_line+1:]
operator_profiling

 '[node type]\t  [first]\t [avg ms]\t     [%]\t  [cdf%]\t  [mem KB]\t[times called]\t[Name]',
 'CONV_2D\t    0.144\t    0.148\t  0.680%\t  0.680%\t     0.000\t        1\t[shufflenetv1/re_lu_33/Relu;shufflenetv1/batch_normalization_49/FusedBatchNormV3;shufflenetv1/batch_normalization_49/FusedBatchNormV3/ReadVariableOp/resource;shufflenetv1/conv2d_33/BiasAdd;shufflenetv1/conv2d_33/Conv2D]:0',
 'MAX_POOL_2D\t    0.063\t    0.065\t  0.301%\t  0.981%\t     0.000\t        1\t[shufflenetv1/max_pooling2d_1/MaxPool]:1',
 'AVERAGE_POOL_2D\t    0.020\t    0.020\t  0.092%\t  1.073%\t     0.000\t        1\t[shufflenetv1/average_pooling2d_3/AvgPool]:2',
 'CONV_2D\t    0.421\t    0.404\t  1.856%\t  2.928%\t     0.000\t        1\t[shufflenetv1/re_lu_34/Relu;shufflenetv1/batch_normalization_50/FusedBatchNormV3;shufflenetv1/batch_normalization_50/FusedBatchNormV3/ReadVariableOp/resource;shufflenetv1/conv2d_34/BiasAdd;shufflenetv1/batch_normalization_60/FusedBatchNormV3;Conv2D]:3',
 'SHAPE\t    0.001\t  

In [110]:
operator_lines_dict = {}
for i, line in enumerate(operator_profiling):
    #if line.str.contains('= Run Order ='):
    if "= Run Order =" in line:
        operator_lines_dict["run_order"] = i
    elif "= Top by Computation Time =" in line:
        operator_lines_dict["top_by_computation_time"] = i
    elif "= Summary by node type =" in line:
        operator_lines_dict["summary_by_node_type"] = i

operator_lines_dict

{'run_order': 0, 'top_by_computation_time': 221, 'summary_by_node_type': 234}

In [136]:
def get_operator_df(text):
    df = pd.DataFrame(text)
    df.rename(columns=df.iloc[0, :], inplace=True) 
    df.drop(df.index[0], inplace=True)
    df.columns = clean_column_names(df)
    try:
        df["%"] = df["%"].apply(string_percent_to_float)
    except:
        df["avg %"] = df["avg %"].apply(string_percent_to_float)
    try:
        df["cdf%"] = df["cdf%"].apply(string_percent_to_float)
    except:
        df["cdf %"] = df["cdf %"].apply(string_percent_to_float)
    df["first"] = df["first"].map(float)
    df["avg ms"] = df["avg ms"].map(float)
    df["mem KB"] = df["mem KB"].map(float)
    df["times called"] = df["times called"].map(int)

    return df

In [141]:
def get_node_df(text):
    df = pd.DataFrame(text)
    df.rename(columns=df.iloc[0, :], inplace=True) 
    df.drop(df.index[0], inplace=True)
    df.columns = clean_column_names(df)
    df["avg %"] = df["avg %"].apply(string_percent_to_float)
    df["cdf %"] = df["cdf %"].apply(string_percent_to_float)
    df["avg ms"] = df["avg ms"].map(float)
    df["mem KB"] = df["mem KB"].map(float)
    df["times called"] = df["times called"].map(int)
    df["count"] = df["count"].map(int)

    return df

In [142]:
summary_by_node_type = operator_profiling[operator_lines_dict["summary_by_node_type"] +1:-3]
summary_by_node_type = remove_tabs(summary_by_node_type)
summary_by_node_type

[['[Node type]',
  '  [count]',
  '  [avg ms]',
  '    [avg %]',
  '    [cdf %]',
  '  [mem KB]',
  '[times called]'],
 ['CONV_2D',
  '       33',
  '    21.102',
  '    97.532%',
  '    97.532%',
  '     0.000',
  '       33'],
 ['DEPTHWISE_CONV_2D',
  '       16',
  '     0.130',
  '     0.601%',
  '    98.133%',
  '     0.000',
  '       16'],
 ['ADD',
  '       13',
  '     0.080',
  '     0.370%',
  '    98.502%',
  '     0.000',
  '       13'],
 ['TRANSPOSE',
  '       16',
  '     0.075',
  '     0.347%',
  '    98.849%',
  '     0.000',
  '       16'],
 ['MAX_POOL_2D',
  '        1',
  '     0.065',
  '     0.300%',
  '    99.150%',
  '     0.000',
  '        1'],
 ['RESHAPE',
  '       32',
  '     0.057',
  '     0.263%',
  '    99.413%',
  '     0.000',
  '       32'],
 ['AVERAGE_POOL_2D',
  '        3',
  '     0.045',
  '     0.208%',
  '    99.621%',
  '     0.000',
  '        3'],
 ['PACK',
  '       32',
  '     0.020',
  '     0.092%',
  '    99.713%',
  '     0.000',


In [143]:
summary_by_node_type_df = get_node_df(summary_by_node_type)
summary_by_node_type_df

Unnamed: 0,Node type,count,avg ms,avg %,cdf %,mem KB,times called
1,CONV_2D,33,21.102,97.532,97.532,0.0,33
2,DEPTHWISE_CONV_2D,16,0.13,0.601,98.133,0.0,16
3,ADD,13,0.08,0.37,98.502,0.0,13
4,TRANSPOSE,16,0.075,0.347,98.849,0.0,16
5,MAX_POOL_2D,1,0.065,0.3,99.15,0.0,1
6,RESHAPE,32,0.057,0.263,99.413,0.0,32
7,AVERAGE_POOL_2D,3,0.045,0.208,99.621,0.0,3
8,PACK,32,0.02,0.092,99.713,0.0,32
9,CONCATENATION,3,0.018,0.083,99.797,0.0,3
10,STRIDED_SLICE,32,0.017,0.079,99.875,0.0,32


In [None]:
top_by_comp_time = operator_profiling[operator_lines_dict["top_by_computation_time"] +1: operator_lines_dict["summary_by_node_type"]-1]
top_by_comp_time = remove_tabs(top_by_comp_time)
#top_by_comp_time

In [115]:
df_operator_top_by_comp_time = get_operator_df(top_by_comp_time)
df_operator_top_by_comp_time

Unnamed: 0,node type,first,avg ms,%,cdf%,mem KB,times called,Name
1,CONV_2D,1.412,1.356,6.232,6.232,0.0,1,[shufflenetv1/re_lu_58/Relu;shufflenetv1/batch_normalization_86/FusedBatchNormV3;shufflenetv1/batch_normalization_55/FusedBatchNormV3/ReadVariableOp/resource;shufflenetv1/conv2d_58/BiasAdd;shufflenetv1/batch_normalization_96/FusedBatchNormV3;Conv2D]:163
2,CONV_2D,1.359,1.181,5.427,11.659,0.0,1,[shufflenetv1/re_lu_42/Relu;shufflenetv1/batch_normalization_62/FusedBatchNormV3;shufflenetv1/batch_normalization_62/FusedBatchNormV3/ReadVariableOp/resource;shufflenetv1/conv2d_42/BiasAdd;shufflenetv1/batch_normalization_84/FusedBatchNormV3;Conv2D]:57
3,CONV_2D,0.747,0.729,3.349,15.009,0.0,1,[shufflenetv1/re_lu_60/Relu;shufflenetv1/batch_normalization_89/FusedBatchNormV3;shufflenetv1/batch_normalization_55/FusedBatchNormV3/ReadVariableOp/resource;shufflenetv1/conv2d_60/BiasAdd;shufflenetv1/batch_normalization_96/FusedBatchNormV3;Conv2D]:177
4,CONV_2D,0.763,0.725,3.333,18.342,0.0,1,[shufflenetv1/re_lu_62/Relu;shufflenetv1/batch_normalization_92/FusedBatchNormV3;shufflenetv1/batch_normalization_55/FusedBatchNormV3/ReadVariableOp/resource;shufflenetv1/conv2d_62/BiasAdd;shufflenetv1/batch_normalization_96/FusedBatchNormV3;Conv2D]:190
5,CONV_2D,0.725,0.722,3.317,21.659,0.0,1,[shufflenetv1/re_lu_64/Relu;shufflenetv1/batch_normalization_95/FusedBatchNormV3;shufflenetv1/batch_normalization_55/FusedBatchNormV3/ReadVariableOp/resource;shufflenetv1/conv2d_64/BiasAdd;shufflenetv1/batch_normalization_96/FusedBatchNormV3;Conv2D]:203
6,CONV_2D,0.778,0.696,3.197,24.855,0.0,1,[shufflenetv1/batch_normalization_61/FusedBatchNormV3;shufflenetv1/batch_normalization_55/FusedBatchNormV3/ReadVariableOp/resource;shufflenetv1/conv2d_41/BiasAdd;shufflenetv1/batch_normalization_96/FusedBatchNormV3;Conv2D]:54
7,CONV_2D,0.77,0.691,3.176,28.031,0.0,1,[shufflenetv1/re_lu_54/Relu;shufflenetv1/batch_normalization_80/FusedBatchNormV3;shufflenetv1/batch_normalization_62/FusedBatchNormV3/ReadVariableOp/resource;shufflenetv1/conv2d_54/BiasAdd;shufflenetv1/batch_normalization_84/FusedBatchNormV3;Conv2D]:136
8,CONV_2D,0.751,0.691,3.174,31.205,0.0,1,[shufflenetv1/batch_normalization_55/FusedBatchNormV3;shufflenetv1/batch_normalization_55/FusedBatchNormV3/ReadVariableOp/resource;shufflenetv1/conv2d_37/BiasAdd;shufflenetv1/batch_normalization_96/FusedBatchNormV3;Conv2D]:28
9,CONV_2D,0.788,0.683,3.138,34.344,0.0,1,[shufflenetv1/batch_normalization_58/FusedBatchNormV3;shufflenetv1/batch_normalization_55/FusedBatchNormV3/ReadVariableOp/resource;shufflenetv1/conv2d_39/BiasAdd;shufflenetv1/batch_normalization_96/FusedBatchNormV3;Conv2D]:41
10,CONV_2D,0.795,0.683,3.138,37.482,0.0,1,[shufflenetv1/re_lu_56/Relu;shufflenetv1/batch_normalization_83/FusedBatchNormV3;shufflenetv1/batch_normalization_62/FusedBatchNormV3/ReadVariableOp/resource;shufflenetv1/conv2d_56/BiasAdd;shufflenetv1/batch_normalization_84/FusedBatchNormV3;Conv2D]:149


In [123]:
run_order = operator_profiling[operator_lines_dict["run_order"] +1: operator_lines_dict["top_by_computation_time"]]
run_order = remove_tabs(run_order)
#run_order

In [121]:
df_operator_run_order = get_operator_df(run_order)
#df_operator_run_order.style.set_properties(**{'text-align': 'left'})
df_operator_run_order.head(10)

Unnamed: 0,node type,first,avg ms,%,cdf%,mem KB,times called,Name
1,CONV_2D,0.144,0.148,0.68,0.68,0.0,1,[shufflenetv1/re_lu_33/Relu;shufflenetv1/batch_normalization_49/FusedBatchNormV3;shufflenetv1/batch_normalization_49/FusedBatchNormV3/ReadVariableOp/resource;shufflenetv1/conv2d_33/BiasAdd;shufflenetv1/conv2d_33/Conv2D]:0
2,MAX_POOL_2D,0.063,0.065,0.301,0.981,0.0,1,[shufflenetv1/max_pooling2d_1/MaxPool]:1
3,AVERAGE_POOL_2D,0.02,0.02,0.092,1.073,0.0,1,[shufflenetv1/average_pooling2d_3/AvgPool]:2
4,CONV_2D,0.421,0.404,1.856,2.928,0.0,1,[shufflenetv1/re_lu_34/Relu;shufflenetv1/batch_normalization_50/FusedBatchNormV3;shufflenetv1/batch_normalization_50/FusedBatchNormV3/ReadVariableOp/resource;shufflenetv1/conv2d_34/BiasAdd;shufflenetv1/batch_normalization_60/FusedBatchNormV3;Conv2D]:3
5,SHAPE,0.001,0.001,0.004,2.932,0.0,1,[shufflenetv1/reshape_32/Shape]:4
6,STRIDED_SLICE,0.001,0.002,0.008,2.94,0.0,1,[shufflenetv1/reshape_32/strided_slice]:5
7,PACK,0.001,0.001,0.006,2.947,0.0,1,[shufflenetv1/reshape_32/Reshape/shape]:6
8,RESHAPE,0.007,0.007,0.033,2.979,0.0,1,[shufflenetv1/reshape_32/Reshape]:7
9,TRANSPOSE,0.016,0.019,0.09,3.069,0.0,1,[shufflenetv1/permute_16/transpose]:8
10,SHAPE,0.001,0.001,0.003,3.072,0.0,1,[shufflenetv1/reshape_33/Shape]:9


In [122]:
df_operator_run_order.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 219 entries, 1 to 219
Data columns (total 8 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   node type     219 non-null    object 
 1   first         219 non-null    float64
 2   avg ms        219 non-null    float64
 3   %             219 non-null    float64
 4   cdf%          219 non-null    float64
 5   mem KB        219 non-null    float64
 6   times called  219 non-null    int64  
 7   Name          219 non-null    object 
dtypes: float64(5), int64(1), object(2)
memory usage: 13.8+ KB


# Logging to wandb

In [147]:
# Generate run ids
id = wandb.util.generate_id()

PROJECT = model_name.split("_")[0]

run = wandb.init(
        # Set the project where this run will be logged
        project=PROJECT, 
        name = model_name,
        id = id, 
        resume="allow",
        sync_tensorboard=True
        )
# Specify the configuration variables
config = wandb.config
config.architecture = model_name

# # Create a table
operator_top_by_time_table = wandb.Table(dataframe=df_operator_top_by_comp_time)
operator_top_by_time_artifact = wandb.Artifact(name=f'{model_name}_operator_top_by_time', type='dataframe')
operator_top_by_time_artifact.add(operator_top_by_time_table, "Profiling operators - top by time")
run.log_artifact(operator_top_by_time_artifact)

summary_by_node_type_table = wandb.Table(dataframe=summary_by_node_type_df)
summary_by_node_type_artifact = wandb.Artifact(name=f'{model_name}_summary_by_node_type', type='dataframe')
summary_by_node_type_artifact.add(summary_by_node_type_table, "Node type summary")
run.log_artifact(summary_by_node_type_artifact)

operator_run_order_table = wandb.Table(dataframe=df_operator_run_order)
operator_run_order_artifact = wandb.Artifact(name=f'{model_name}_operator_run_order', type='dataframe')
operator_run_order_artifact.add(operator_run_order_table, "Run order")
run.log_artifact(operator_run_order_artifact)


#run.log({"Profiling operators - top by time": operator_top_by_time})
# run.log({"Chart_table": table})

# table = wandb.Table(columns = ["plotly_figure"])

# # Create path for Plotly figure
# path_to_plotly_html = "./plotly_figure.html"

# # Write Plotly figure to HTML
# fig.write_html(path_to_plotly_html, auto_play = False) # Setting auto_play to False prevents animated Plotly charts from playing in the table automatically

# # Add Plotly figure as HTML file into Table
# table.add_data(wandb.Html(path_to_plotly_html))

# # Log Table
# run.log({"Chart_table": table})

# #wandb.log({"Peak activations chart": fig})

wandb.finish()

ValueError: Artifact name may only contain alphanumeric characters, dashes, underscores, and dots. Invalid name: "shufflenetv1_1_96_c3_o3_000 - operator_top_by_time"