# 03a Latency Analysis - with emphasis on the difference between the models (and their respective optimization)

In [1]:
%run '00_Imports-and-settings.ipynb'

Numpy Version:		 1.19.0
Pandas Version:		 1.0.5
Matplotlib Version:	 3.2.2


In [2]:
# helper function to get model name
def get_model_name(model_string):
    if 'ResNet' in model_string:
        model_name = 'ResNet20_CIFAR-10'
    elif 'LeNet' in model_string:
        model_name = 'LeNet-MNIST'
    else:
        model_name = 'unknown'
        print('unknown model name')
    return model_name

### Load the data

In [3]:
model = 'LeNet-MNIST'
l4_df = pd.read_pickle('results/L4/NUCLEO_L496ZG_LeNet-MNIST_benchmarking+verification_results_2020-07-24.pkl').sort_values(by=['model_type'])
f4_df = pd.read_pickle('results/F4/DISCO_F469NI_LeNet-MNIST_benchmarking+verification_results_2020-07-18.pkl').sort_values(by=['model_type'])
f7_df = pd.read_pickle('results/F7/NUCLEO_F767ZI_LeNet-MNIST_benchmarking+verification_results_2020-07-22.pkl').sort_values(by=['model_type'])

lenet_df = {"L4": l4_df, "F4": f4_df, "F7": f7_df}


model = '01d_ResNet20_CIFAR-10'
l4_df = pd.read_pickle('results/L4/NUCLEO_L496ZG_01d_ResNet20_CIFAR-10_benchmarking_results_2020-07-20.pkl').sort_values(by=['model_type'])
f4_df = pd.read_pickle('results/F4/DISCO_F469NI_01d_ResNet20_CIFAR-10_benchmarking_results_2020-07-19.pkl').sort_values(by=['model_type'])
f7_df = pd.read_pickle('results/F7/NUCLEO_F767ZI_01d_ResNet20_CIFAR-10_benchmarking_results_2020-07-22.pkl').sort_values(by=['model_type'])

resnet_df = {"L4": l4_df, "F4": f4_df, "F7": f7_df}

In [4]:
super_df = pd.DataFrame()
for mcu in resnet_df:
    super_df= super_df.append(resnet_df[mcu])
    super_df= super_df.append(lenet_df[mcu])

In [5]:
super_df['tflu_mcu_benchmark_mean'] = super_df['tflu_mcu_benchmark_mean'].fillna(super_df['tflu_mcu_benchmark_mean_50'])

Available columns

In [6]:
resnet_df['F4'].columns

Index(['time', 'MCU', 'model', 'mbed-dir', 'cmsis-nn', 'arena_size',
       'compiler_optimization', 'FPU_status', 'MACs', 'pruned', 'weights',
       'activations', 'model_type', 'model_size', 'model_size_reduction',
       'model_size_gzip', 'model_size_reduction_gzip', 'binary_size',
       'input_details_dtype', 'input_details_shape', 'output_details_dtype',
       'output_details_shape', 'tfl_interpreter_accuracy',
       'tfl_interpreter_loss_crossentropy', 'tfl_interpreter_loss_meansquared',
       'inferences_per_cycle', 'tflu_mcu_benchmark_single',
       'tflu_mcu_benchmark_mean', 'tflu_mcu_benchmark_std',
       'tflu_mcu_accuracy', 'tflu_mcu_loss_crossentropy',
       'tflu_mcu_loss_meansquared', 'tflu_mcu_accuracy_50',
       'tflu_mcu_benchmark_mean_50', 'tflu_mcu_benchmark_std_50'],
      dtype='object')

## Latency

## Basic model just comparing the quantization

In [7]:

filter_dict = {
    'compiler_optimization': '-Ofast',
    'cmsis-nn': 'none',
    'FPU_status': 1
}

for dfs in [lenet_df, resnet_df]:
        for mcu in dfs:
            df = dfs[mcu]

            model_name = get_model_name(df['model'].unique()[0])


            filter = (df['weights'] == df['activations'])
            
            filter_descr = ''
            for key in filter_dict:
                filter = filter & (df[key] == filter_dict[key])
                filter_descr += (str(filter_dict[key]) + '_')
                


            df_filtered = df.loc[filter]

            # let's sort the filtered df so that the order in the plots match
            df_filtered = df_filtered.sort_values(by=['model_type'])

            try:
                df_filtered.loc[:,('tflu_mcu_benchmark_mean_50')] /= 1000
                fig = sns.catplot(y='model_type', x='tflu_mcu_benchmark_mean_50', kind='bar', data=df_filtered)
                display(df_filtered[['model_type', 'tflu_mcu_benchmark_mean_50']].drop_duplicates())

            except KeyError:
                df_filtered.loc[:,('tflu_mcu_benchmark_mean')] /= 1000
                fig = sns.catplot(y='model_type', x='tflu_mcu_benchmark_mean', kind='bar', data=df_filtered)
                display(df_filtered[['model_type','tflu_mcu_benchmark_mean']].drop_duplicates())


            #plt.title(mcu + ' - ' + model_name  + json.dumps(filter_dict, indent=4)[1:-1])

            plt.xlabel("Inference Latency [ms]")
            plt.ylabel("Model Type")
            fig.set_yticklabels(['\\texttt{float32}', '\\texttt{int8}'])

            
            filename = f'figures/latency/perMCU/{mcu}_{model_name}_basic__{filter_descr}'
        
            print(filename)

            plt.tight_layout()
            plt.show()
            plt.savefig(filename + '.pdf')
            tikzplotlib.save(filename + '.tex', axis_width=AXIS_WIDTH, axis_height=AXIS_HEIGHT)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Unnamed: 0,model_type,tflu_mcu_benchmark_mean
0,W-float32_A-float32,112.1962348
2,W-int8_A-int8,120.6695127


figures/latency/perMCU/L4_LeNet-MNIST_basic__-Ofast_none_1_


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Unnamed: 0,model_type,tflu_mcu_benchmark_mean
12,W-float32_A-float32,58.5430699
14,W-int8_A-int8,65.5978391


figures/latency/perMCU/F4_LeNet-MNIST_basic__-Ofast_none_1_


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Unnamed: 0,model_type,tflu_mcu_benchmark_mean
0,W-float32_A-float32,19.5964754
2,W-int8_A-int8,17.8879866


figures/latency/perMCU/F7_LeNet-MNIST_basic__-Ofast_none_1_


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Unnamed: 0,model_type,tflu_mcu_benchmark_mean_50
4,W-float32_A-float32,
0,W-int8_A-int8,12088.4626


figures/latency/perMCU/L4_ResNet20_CIFAR-10_basic__-Ofast_none_1_


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Unnamed: 0,model_type,tflu_mcu_benchmark_mean_50
4,W-float32_A-float32,6151.92884
0,W-int8_A-int8,6506.26076


figures/latency/perMCU/F4_ResNet20_CIFAR-10_basic__-Ofast_none_1_


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Unnamed: 0,model_type,tflu_mcu_benchmark_mean_50
2,W-float32_A-float32,1613.53754
0,W-int8_A-int8,1119.74508


figures/latency/perMCU/F7_ResNet20_CIFAR-10_basic__-Ofast_none_1_


## hue = FPU

In [9]:
hue = 'FPU_status'

filter_dict = {
    'compiler_optimization': '-Ofast',
    'cmsis-nn': 'none'
}

for dfs in [lenet_df, resnet_df]:
        for mcu in dfs:
            df = dfs[mcu]

            model_name = get_model_name(df['model'].unique()[0])


            filter = (df['weights'] == df['activations'])
            
            filter_descr = ''
            for key in filter_dict:
                filter = filter & (df[key] == filter_dict[key])
                filter_descr += (filter_dict[key] + '_')
                


            df_filtered = df.loc[filter]

            # let's sort the filtered df so that the order in the plots match
            df_filtered = df_filtered.sort_values(by=['model_type','cmsis-nn'])

            try:
                df_filtered.loc[:,('tflu_mcu_benchmark_mean_50')] /= 1000
                fig = sns.catplot(y='model_type', x='tflu_mcu_benchmark_mean_50', kind='bar', hue=hue, data=df_filtered)
                display(df_filtered[['model_type', 'tflu_mcu_benchmark_mean_50']].drop_duplicates())

            except KeyError:
                df_filtered.loc[:,('tflu_mcu_benchmark_mean')] /= 1000
                fig = sns.catplot(y='model_type', x='tflu_mcu_benchmark_mean', kind='bar', hue=hue,  data=df_filtered)
                display(df_filtered[['model_type','tflu_mcu_benchmark_mean']].drop_duplicates())


            #plt.title(mcu + ' - ' + model_name  + json.dumps(filter_dict, indent=4)[1:-1])

            plt.xlabel("Inference Latency [ms]")
            plt.ylabel("Model Type")
            fig.set_yticklabels(['\\texttt{float32}', '\\texttt{int8}'])

            
            filename = f'figures/latency/perMCU/{mcu}_{model_name}_hue-{hue}__{filter_descr}'
        
            print(filename)

            plt.tight_layout()
            plt.show()
            plt.savefig(filename + '.pdf')
            tikzplotlib.save(filename + '.tex', axis_width=AXIS_WIDTH, axis_height=AXIS_HEIGHT)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Unnamed: 0,model_type,tflu_mcu_benchmark_mean
1,W-float32_A-float32,113.1516721
5,W-float32_A-float32,404.4892701
3,W-int8_A-int8,36.391342
7,W-int8_A-int8,36.9907969


figures/latency/perMCU/L4_LeNet-MNIST_hue-FPU_status__-Ofast_cmsis-nn_


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Unnamed: 0,model_type,tflu_mcu_benchmark_mean
9,W-float32_A-float32,188.8158139
13,W-float32_A-float32,59.5089206
11,W-int8_A-int8,16.4595706
15,W-int8_A-int8,16.1060121


figures/latency/perMCU/F4_LeNet-MNIST_hue-FPU_status__-Ofast_cmsis-nn_


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Unnamed: 0,model_type,tflu_mcu_benchmark_mean
1,W-float32_A-float32,19.9671834
5,W-float32_A-float32,105.1181112
3,W-int8_A-int8,8.0455337
7,W-int8_A-int8,8.3495034


figures/latency/perMCU/F7_LeNet-MNIST_hue-FPU_status__-Ofast_cmsis-nn_


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Unnamed: 0,model_type,tflu_mcu_benchmark_mean_50
11,W-float32_A-float32,
7,W-int8_A-int8,2223.33386
1,W-int8_A-int8,2217.00896


figures/latency/perMCU/L4_ResNet20_CIFAR-10_hue-FPU_status__-Ofast_cmsis-nn_


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Unnamed: 0,model_type,tflu_mcu_benchmark_mean_50
11,W-float32_A-float32,21002.11226
5,W-float32_A-float32,6151.93348
7,W-int8_A-int8,985.54588
1,W-int8_A-int8,984.3176


figures/latency/perMCU/F4_ResNet20_CIFAR-10_hue-FPU_status__-Ofast_cmsis-nn_


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Unnamed: 0,model_type,tflu_mcu_benchmark_mean_50
3,W-float32_A-float32,1613.39756
7,W-float32_A-float32,11046.48006
1,W-int8_A-int8,449.30214
5,W-int8_A-int8,451.766


figures/latency/perMCU/F7_ResNet20_CIFAR-10_hue-FPU_status__-Ofast_cmsis-nn_


### hue = cmsis-nn

In [11]:
hue = 'cmsis-nn'

filter_dict = {
    'compiler_optimization': '-Ofast',
    'FPU_status': 0,
}

for dfs in [lenet_df, resnet_df]:
        for mcu in dfs:
            df = dfs[mcu]

            model_name = get_model_name(df['model'].unique()[0])


            filter = (df['weights'] == df['activations'])
            
            filter_descr = ''
            for key in filter_dict:
                filter = filter & (df[key] == filter_dict[key])
                filter_descr += (str(filter_dict[key]) + '_')
                


            df_filtered = df.loc[filter]

            # let's sort the filtered df so that the order in the plots match
            df_filtered = df_filtered.sort_values(by=['model_type'])

            try:
                df_filtered.loc[:,('tflu_mcu_benchmark_mean_50')] /= 1000
                fig = sns.catplot(y='model_type', x='tflu_mcu_benchmark_mean_50', kind='bar', hue=hue, data=df_filtered, height=4, aspect=16/9)
                display(df_filtered[['model_type', 'tflu_mcu_benchmark_mean_50']].drop_duplicates())

            except KeyError:
                df_filtered.loc[:,('tflu_mcu_benchmark_mean')] /= 1000
                fig = sns.catplot(y='model_type', x='tflu_mcu_benchmark_mean', kind='bar', hue=hue, data=df_filtered, height=4, aspect=16/9)
                display(df_filtered[['model_type','tflu_mcu_benchmark_mean']].drop_duplicates())


            #.title(mcu + ' - ' + model_name  + json.dumps(filter_dict, indent=4)[1:-1])                        
            fig.set_yticklabels(['\\texttt{float32}', '\\texttt{int8}'])

            plt.xlabel("Inference Latency [ms]")
            plt.ylabel("Model Type")
            
            filename = f'figures/latency/perMCU/{mcu}_{model_name}_hue-{hue}__{filter_descr}'
        
            print(filename)

            plt.tight_layout()
            plt.show()
            plt.savefig(filename + '.pdf')
            tikzplotlib.save(filename + '.tex', axis_width=AXIS_WIDTH, axis_height=AXIS_HEIGHT)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Unnamed: 0,model_type,tflu_mcu_benchmark_mean
4,W-float32_A-float32,405.645166
5,W-float32_A-float32,404.4892701
6,W-int8_A-int8,120.2069748
7,W-int8_A-int8,36.9907969


figures/latency/perMCU/L4_LeNet-MNIST_hue-cmsis-nn__-Ofast_0_


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Unnamed: 0,model_type,tflu_mcu_benchmark_mean
8,W-float32_A-float32,190.1940494
9,W-float32_A-float32,188.8158139
10,W-int8_A-int8,65.3959346
11,W-int8_A-int8,16.4595706


figures/latency/perMCU/F4_LeNet-MNIST_hue-cmsis-nn__-Ofast_0_


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Unnamed: 0,model_type,tflu_mcu_benchmark_mean
4,W-float32_A-float32,104.307814
5,W-float32_A-float32,105.1181112
6,W-int8_A-int8,18.2157146
7,W-int8_A-int8,8.3495034


figures/latency/perMCU/F7_LeNet-MNIST_hue-cmsis-nn__-Ofast_0_


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Unnamed: 0,model_type,tflu_mcu_benchmark_mean_50
11,W-float32_A-float32,
6,W-int8_A-int8,12090.50392
7,W-int8_A-int8,2223.33386


figures/latency/perMCU/L4_ResNet20_CIFAR-10_hue-cmsis-nn__-Ofast_0_


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Unnamed: 0,model_type,tflu_mcu_benchmark_mean_50
11,W-float32_A-float32,21002.11226
10,W-float32_A-float32,21002.18334
6,W-int8_A-int8,6507.4248
7,W-int8_A-int8,985.54588


figures/latency/perMCU/F4_ResNet20_CIFAR-10_hue-cmsis-nn__-Ofast_0_


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Unnamed: 0,model_type,tflu_mcu_benchmark_mean_50
6,W-float32_A-float32,11035.57802
7,W-float32_A-float32,11046.48006
4,W-int8_A-int8,1121.68646
5,W-int8_A-int8,451.766


figures/latency/perMCU/F7_ResNet20_CIFAR-10_hue-cmsis-nn__-Ofast_0_


In [12]:
display(df_filtered[['MCU','tflu_mcu_benchmark_mean']].drop_duplicates())

Unnamed: 0,MCU,tflu_mcu_benchmark_mean
6,NUCLEO_F767ZI,


### hue = compiler_optimization

In [14]:
hue = 'compiler_optimization'

filter_dict = {
    'cmsis-nn': 'cmsis-nn',
    'FPU_status': 1,
}

for dfs in [lenet_df, resnet_df]:
        for mcu in dfs:
            df = dfs[mcu]

            model_name = get_model_name(df['model'].unique()[0])
            


            filter = (df['weights'] == df['activations'])
            
            filter_descr = ''
            for key in filter_dict:
                filter = filter & (df[key] == filter_dict[key])
                filter_descr += (str(filter_dict[key]) + '_')
                


            df_filtered = df.loc[filter]
            
                
            if ('ResNet' in model_name) & (mcu=='L4'):
                display(df_filtered)
                df_filtered['tflu_mcu_benchmark_mean_50'].fillna(0)

            # let's sort the filtered df so that the order in the plots match
            df_filtered = df_filtered.sort_values(by=['model_type'])
            df_filtered = df_filtered.sort_values(by=['compiler_optimization'])
    



            try:
                df_filtered.loc[:,('tflu_mcu_benchmark_mean_50')] /= 1000
                fig = sns.catplot(y='model_type', x='tflu_mcu_benchmark_mean_50', kind='bar', hue=hue, data=df_filtered, height=4, aspect=16/9)
                display(df_filtered[['model_type', 'tflu_mcu_benchmark_mean_50']].drop_duplicates())

            except KeyError:
                df_filtered.loc[:,('tflu_mcu_benchmark_mean')] /= 1000
                fig = sns.catplot(y='model_type', x='tflu_mcu_benchmark_mean', kind='bar', hue=hue, data=df_filtered, height=4, aspect=16/9)
                display(df_filtered[['model_type','tflu_mcu_benchmark_mean']].drop_duplicates())


            #plt.title(mcu + ' - ' + model_name  + json.dumps(filter_dict, indent=4)[1:-1])

            plt.xlabel("Inference Latency [ms]")
            plt.ylabel("Model Type")
            fig.set_yticklabels(['\\texttt{float32}', '\\texttt{int8}'])

            filename = f'figures/latency/perMCU/{mcu}_{model_name}_hue-{hue}__{filter_descr}'
        
            print(filename)

            plt.tight_layout()
            plt.show()
            plt.savefig(filename + '.pdf')
            tikzplotlib.save(filename + '.tex', axis_width=AXIS_WIDTH, axis_height=AXIS_HEIGHT)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Unnamed: 0,model_type,tflu_mcu_benchmark_mean
1,W-float32_A-float32,113.1516721
3,W-int8_A-int8,36.391342
9,W-float32_A-float32,173.6726751
11,W-int8_A-int8,41.8679232


figures/latency/perMCU/L4_LeNet-MNIST_hue-compiler_optimization__cmsis-nn_1_


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Unnamed: 0,model_type,tflu_mcu_benchmark_mean
13,W-float32_A-float32,59.5089206
15,W-int8_A-int8,16.1060121
5,W-float32_A-float32,93.1434285
7,W-int8_A-int8,19.4709134


figures/latency/perMCU/F4_LeNet-MNIST_hue-compiler_optimization__cmsis-nn_1_


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Unnamed: 0,model_type,tflu_mcu_benchmark_mean
1,W-float32_A-float32,19.9671834
3,W-int8_A-int8,8.0455337
9,W-float32_A-float32,41.4395183
11,W-int8_A-int8,9.4108728


figures/latency/perMCU/F7_LeNet-MNIST_hue-compiler_optimization__cmsis-nn_1_


Unnamed: 0,time,MCU,model,mbed-dir,cmsis-nn,arena_size,compiler_optimization,FPU_status,MACs,pruned,...,tflu_mcu_benchmark_single,tflu_mcu_benchmark_mean,tflu_mcu_benchmark_std,tflu_mcu_accuracy,tflu_mcu_loss_crossentropy,tflu_mcu_loss_meansquared,tflu_mcu_accuracy_50,tflu_mcu_benchmark_mean_50,tflu_mcu_benchmark_std_50,error
17,2020-07-20 11:14:20.119848,NUCLEO_L496ZG,01d_ResNet20_CIFAR-10_none_tflite-builtins_non...,./TFLu_benchmark-model_mbed_cmsis-nn,cmsis-nn,250,-Os,1,1625631,0,...,,,,,,,,,,error during compilation
5,2020-07-20 08:31:11.522870,NUCLEO_L496ZG,01d_ResNet20_CIFAR-10_none_tflite-builtins_non...,./TFLu_benchmark-model_mbed_cmsis-nn,cmsis-nn,250,-Ofast,1,1625631,0,...,,,,,,,,,,error during compilation
13,2020-07-20 10:21:44.225243,NUCLEO_L496ZG,01d_ResNet20_CIFAR-10_none_tflite-builtins-INT...,./TFLu_benchmark-model_mbed_cmsis-nn,cmsis-nn,250,-Os,1,1625631,0,...,2839654.8,,,,,,0.94,2839520.76,212.32602855,
1,2020-07-20 08:00:33.399696,NUCLEO_L496ZG,01d_ResNet20_CIFAR-10_none_tflite-builtins-INT...,./TFLu_benchmark-model_mbed_cmsis-nn,cmsis-nn,250,-Ofast,1,1625631,0,...,2217023.8,,,,,,0.94,2217008.96,186.1021182,


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Unnamed: 0,model_type,tflu_mcu_benchmark_mean_50
5,W-float32_A-float32,
1,W-int8_A-int8,2217.00896
13,W-int8_A-int8,2839.52076


figures/latency/perMCU/L4_ResNet20_CIFAR-10_hue-compiler_optimization__cmsis-nn_1_


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Unnamed: 0,model_type,tflu_mcu_benchmark_mean_50
5,W-float32_A-float32,6151.93348
1,W-int8_A-int8,984.3176
17,W-float32_A-float32,14186.86932
13,W-int8_A-int8,1391.3408


figures/latency/perMCU/F4_ResNet20_CIFAR-10_hue-compiler_optimization__cmsis-nn_1_


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Unnamed: 0,model_type,tflu_mcu_benchmark_mean_50
3,W-float32_A-float32,1613.39756
1,W-int8_A-int8,449.30214
11,W-float32_A-float32,6053.5376
9,W-int8_A-int8,629.71014


figures/latency/perMCU/F7_ResNet20_CIFAR-10_hue-compiler_optimization__cmsis-nn_1_


In [15]:
hue = 'compiler_optimization'

filter_dict = {
    'FPU_status': 1,
    'model_type': 'W-int8_A-int8'
}

for dfs in [lenet_df, resnet_df]:
        for mcu in dfs:
            df = dfs[mcu]

            model_name = get_model_name(df['model'].unique()[0])
            


            filter = (df['weights'] == df['activations'])
            
            filter_descr = ''
            for key in filter_dict:
                filter = filter & (df[key] == filter_dict[key])
                filter_descr += (str(filter_dict[key]) + '_')
                


            df_filtered = df.loc[filter]
            
                
#             if ('ResNet' in model_name) & (mcu=='L4'):
#                 display(df_filtered)
#                 df_filtered['tflu_mcu_benchmark_mean_50'].fillna(0)

            # let's sort the filtered df so that the order in the plots match
            df_filtered = df_filtered.sort_values(by=['model_type', 'cmsis-nn'])
            df_filtered = df_filtered.sort_values(by=['compiler_optimization'])
    



            try:
                df_filtered.loc[:,('tflu_mcu_benchmark_mean_50')] /= 1e6
                fig = sns.catplot(y='cmsis-nn', x='tflu_mcu_benchmark_mean_50', kind='bar', hue=hue, data=df_filtered, height=4, aspect=16/9)
                display(df_filtered[['model_type', 'tflu_mcu_benchmark_mean_50']].drop_duplicates())
                plt.xlabel("Inference Latency [s]")


            except KeyError:
                df_filtered.loc[:,('tflu_mcu_benchmark_mean')] /= 1000
                fig = sns.catplot(y='cmsis-nn', x='tflu_mcu_benchmark_mean', kind='bar', hue=hue, data=df_filtered, height=4, aspect=16/9)
                display(df_filtered[['model_type','tflu_mcu_benchmark_mean']].drop_duplicates())
                plt.xlabel("Inference Latency [ms]")



            #plt.title(mcu + ' - ' + model_name  + json.dumps(filter_dict, indent=4)[1:-1])

            plt.ylabel("\\texttt{cmsis-nn}")
            fig.set_yticklabels(['enabled', 'disabled'])

            filename = f'figures/latency/perMCU/{mcu}_{model_name}_y-model_hue-{hue}__{filter_descr}'
        
            print(filename)

            plt.tight_layout()
            plt.show()
            plt.savefig(filename + '.pdf')
            tikzplotlib.save(filename + '.tex', axis_width=AXIS_WIDTH, axis_height=AXIS_HEIGHT)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Unnamed: 0,model_type,tflu_mcu_benchmark_mean
3,W-int8_A-int8,36.391342
2,W-int8_A-int8,120.6695127
11,W-int8_A-int8,41.8679232
10,W-int8_A-int8,192.8983619


figures/latency/perMCU/L4_LeNet-MNIST_y-model_hue-compiler_optimization__1_W-int8_A-int8_


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Unnamed: 0,model_type,tflu_mcu_benchmark_mean
15,W-int8_A-int8,16.1060121
14,W-int8_A-int8,65.5978391
7,W-int8_A-int8,19.4709134
6,W-int8_A-int8,107.4030283


figures/latency/perMCU/F4_LeNet-MNIST_y-model_hue-compiler_optimization__1_W-int8_A-int8_


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Unnamed: 0,model_type,tflu_mcu_benchmark_mean
3,W-int8_A-int8,8.0455337
2,W-int8_A-int8,17.8879866
11,W-int8_A-int8,9.4108728
10,W-int8_A-int8,45.220109


figures/latency/perMCU/F7_LeNet-MNIST_y-model_hue-compiler_optimization__1_W-int8_A-int8_


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Unnamed: 0,model_type,tflu_mcu_benchmark_mean_50
1,W-int8_A-int8,2.21700896
0,W-int8_A-int8,12.0884626
13,W-int8_A-int8,2.83952076
12,W-int8_A-int8,26.58078734


figures/latency/perMCU/L4_ResNet20_CIFAR-10_y-model_hue-compiler_optimization__1_W-int8_A-int8_


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Unnamed: 0,model_type,tflu_mcu_benchmark_mean_50
1,W-int8_A-int8,0.9843176
0,W-int8_A-int8,6.50626076
13,W-int8_A-int8,1.3913408
12,W-int8_A-int8,14.99310336


figures/latency/perMCU/F4_ResNet20_CIFAR-10_y-model_hue-compiler_optimization__1_W-int8_A-int8_


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Unnamed: 0,model_type,tflu_mcu_benchmark_mean_50
1,W-int8_A-int8,0.44930214
0,W-int8_A-int8,1.11974508
9,W-int8_A-int8,0.62971014
8,W-int8_A-int8,6.22556866


figures/latency/perMCU/F7_ResNet20_CIFAR-10_y-model_hue-compiler_optimization__1_W-int8_A-int8_


In [None]:
super_df

In [16]:
hue = 'compiler_optimization'

filter_dict = {
    #'cmsis-nn': 'cmsis-nn',
    'FPU_status': 1,
    'model_type': 'W-int8_A-int8',
    'model': '01d_ResNet20_CIFAR-10_none_tflite-builtins-INT8_none_dataset'
}

df = super_df



filter = (df['weights'] == df['activations'])

filter_descr = ''
for key in filter_dict:
    filter = filter & (df[key] == filter_dict[key])
    filter_descr += (str(filter_dict[key]) + '_')



df_filtered = df.loc[filter]

# let's sort the filtered df so that the order in the plots match
df_filtered = df_filtered.sort_values(by=['model_type'])
df_filtered = df_filtered.sort_values(by=['compiler_optimization'])



df_filtered.loc[:,('tflu_mcu_benchmark_mean')] /= 1000
fig = sns.catplot(y='MCU', x='tflu_mcu_benchmark_mean', kind='bar', hue=hue, col='model', row='cmsis-nn', data=df_filtered, height=4, aspect=16/9)
display(df_filtered[['MCU', 'tflu_mcu_benchmark_mean']].drop_duplicates())




plt.title(json.dumps(filter_dict, indent=4)[1:-1])

plt.xlabel("Inference latency [ms]")
plt.ylabel("Model Type")

filename = f'figures/latency/perMCU/compiler_hue-{hue}__{filter_descr}'

print(filename)

plt.tight_layout()
plt.show()
plt.savefig(filename + '.pdf')
tikzplotlib.save(filename + '.tex', axis_width=AXIS_WIDTH, axis_height=AXIS_HEIGHT)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Unnamed: 0,MCU,tflu_mcu_benchmark_mean
1,NUCLEO_L496ZG,2217.00896
0,NUCLEO_L496ZG,12088.4626
1,DISCO_F469NI,984.3176
0,DISCO_F469NI,6506.26076
0,NUCLEO_F767ZI,1119.74508
1,NUCLEO_F767ZI,449.30214
13,NUCLEO_L496ZG,2839.52076
12,NUCLEO_L496ZG,26580.78734
13,DISCO_F469NI,1391.3408
12,DISCO_F469NI,14993.10336


figures/latency/perMCU/compiler_hue-compiler_optimization__1_W-int8_A-int8_01d_ResNet20_CIFAR-10_none_tflite-builtins-INT8_none_dataset_


### relative time to one another (the models)

int8 cmsis-nn vs all the others

In [17]:
for dfs in [lenet_df, resnet_df]:
        for mcu in dfs:
            df = dfs[mcu]

            model_name = get_model_name(df['model'].unique()[0])

            filter = (df['weights'] == df['activations'])
            

            df_filtered = df.loc[filter]
            
            df_filtered = df_filtered.sort_values(by=['model_type'])
            df_filtered = df_filtered.sort_values(by=['compiler_optimization'])
            df_filtered = df_filtered.sort_values(by=['FPU_status'])
            df_filtered = df_filtered.sort_values(by=['cmsis-nn'])

            
            # calculate relative times

            min_index = df_filtered['tflu_mcu_benchmark_mean'].idxmin()
            max_index = df_filtered['tflu_mcu_benchmark_mean'].idxmax()
            if np.isnan(max_index):
                min_index = df_filtered['tflu_mcu_benchmark_mean_50'].idxmin()
                max_index = df_filtered['tflu_mcu_benchmark_mean_50'].idxmax()

            print('min_index', min_index)
            print('max_index', max_index)

            
            display( df_filtered.loc[max_index][['MCU','model','model_type','FPU_status', 'compiler_optimization', 'cmsis-nn']] )

            try:
                df_filtered['relative_speedup'] = df_filtered.loc[max_index, 'tflu_mcu_benchmark_mean_50'] / df_filtered['tflu_mcu_benchmark_mean_50']
                display( df_filtered[['model','model_type','FPU_status', 'compiler_optimization', 'cmsis-nn','tflu_mcu_benchmark_mean_50','relative_speedup']] )

            except KeyError:
                df_filtered['relative_speedup'] = df_filtered.loc[max_index, 'tflu_mcu_benchmark_mean'] / df_filtered['tflu_mcu_benchmark_mean']
                display( df_filtered[['model','model_type','FPU_status', 'compiler_optimization', 'cmsis-nn','tflu_mcu_benchmark_mean','relative_speedup']] )

            
            

            print('\n')

            # let's sort the filtered df so that the order in the plots match
            #        display(df_filtered[['model_type','tflu_mcu_benchmark_mean']].drop_duplicates())



min_index 3
max_index 12


MCU                                                   NUCLEO_L496ZG
model                    LeNet-MNIST_none_tflite-builtins_none_none
model_type                                      W-float32_A-float32
FPU_status                                                        0
compiler_optimization                                           -Os
cmsis-nn                                                       none
Name: 12, dtype: object

Unnamed: 0,model,model_type,FPU_status,compiler_optimization,cmsis-nn,tflu_mcu_benchmark_mean,relative_speedup
5,LeNet-MNIST_none_tflite-builtins_none_none,W-float32_A-float32,0,-Ofast,cmsis-nn,404489.2701,1.16329042
7,LeNet-MNIST_none_tflite-builtins-INT8_none_dat...,W-int8_A-int8,0,-Ofast,cmsis-nn,36990.7969,12.72042058
13,LeNet-MNIST_none_tflite-builtins_none_none,W-float32_A-float32,0,-Os,cmsis-nn,465332.5883,1.01118749
15,LeNet-MNIST_none_tflite-builtins-INT8_none_dat...,W-int8_A-int8,0,-Os,cmsis-nn,42565.2597,11.05451951
1,LeNet-MNIST_none_tflite-builtins_none_none,W-float32_A-float32,1,-Ofast,cmsis-nn,113151.6721,4.15847583
3,LeNet-MNIST_none_tflite-builtins-INT8_none_dat...,W-int8_A-int8,1,-Ofast,cmsis-nn,36391.342,12.92995719
9,LeNet-MNIST_none_tflite-builtins_none_none,W-float32_A-float32,1,-Os,cmsis-nn,173672.6751,2.70934097
11,LeNet-MNIST_none_tflite-builtins-INT8_none_dat...,W-int8_A-int8,1,-Os,cmsis-nn,41867.9232,11.23863947
4,LeNet-MNIST_none_tflite-builtins_none_none,W-float32_A-float32,0,-Ofast,none,405645.166,1.1599756
6,LeNet-MNIST_none_tflite-builtins-INT8_none_dat...,W-int8_A-int8,0,-Ofast,none,120206.9748,3.91440259




min_index 15
max_index 0


MCU                                                    DISCO_F469NI
model                    LeNet-MNIST_none_tflite-builtins_none_none
model_type                                      W-float32_A-float32
FPU_status                                                        0
compiler_optimization                                           -Os
cmsis-nn                                                       none
Name: 0, dtype: object

Unnamed: 0,model,model_type,FPU_status,compiler_optimization,cmsis-nn,tflu_mcu_benchmark_mean,relative_speedup
9,LeNet-MNIST_none_tflite-builtins_none_none,W-float32_A-float32,0,-Ofast,cmsis-nn,188815.8139,1.20542798
11,LeNet-MNIST_none_tflite-builtins-INT8_none_dat...,W-int8_A-int8,0,-Ofast,cmsis-nn,16459.5706,13.82805603
1,LeNet-MNIST_none_tflite-builtins_none_none,W-float32_A-float32,0,-Os,cmsis-nn,225235.7582,1.0105139
3,LeNet-MNIST_none_tflite-builtins-INT8_none_dat...,W-int8_A-int8,0,-Os,cmsis-nn,19774.4448,11.51000025
13,LeNet-MNIST_none_tflite-builtins_none_none,W-float32_A-float32,1,-Ofast,cmsis-nn,59508.9206,3.82470161
15,LeNet-MNIST_none_tflite-builtins-INT8_none_dat...,W-int8_A-int8,1,-Ofast,cmsis-nn,16106.0121,14.13160894
5,LeNet-MNIST_none_tflite-builtins_none_none,W-float32_A-float32,1,-Os,cmsis-nn,93143.4285,2.44358478
7,LeNet-MNIST_none_tflite-builtins-INT8_none_dat...,W-int8_A-int8,1,-Os,cmsis-nn,19470.9134,11.68942924
8,LeNet-MNIST_none_tflite-builtins_none_none,W-float32_A-float32,0,-Ofast,none,190194.0494,1.19669288
10,LeNet-MNIST_none_tflite-builtins-INT8_none_dat...,W-int8_A-int8,0,-Ofast,none,65395.9346,3.48039776




min_index 3
max_index 12


MCU                                                   NUCLEO_F767ZI
model                    LeNet-MNIST_none_tflite-builtins_none_none
model_type                                      W-float32_A-float32
FPU_status                                                        0
compiler_optimization                                           -Os
cmsis-nn                                                       none
Name: 12, dtype: object

Unnamed: 0,model,model_type,FPU_status,compiler_optimization,cmsis-nn,tflu_mcu_benchmark_mean,relative_speedup
5,LeNet-MNIST_none_tflite-builtins_none_none,W-float32_A-float32,0,-Ofast,cmsis-nn,105118.1112,1.20262186
7,LeNet-MNIST_none_tflite-builtins-INT8_none_dat...,W-int8_A-int8,0,-Ofast,cmsis-nn,8349.5034,15.14070143
13,LeNet-MNIST_none_tflite-builtins_none_none,W-float32_A-float32,0,-Os,cmsis-nn,124705.4675,1.01372731
15,LeNet-MNIST_none_tflite-builtins-INT8_none_dat...,W-int8_A-int8,0,-Os,cmsis-nn,9893.8328,12.77738776
1,LeNet-MNIST_none_tflite-builtins_none_none,W-float32_A-float32,1,-Ofast,cmsis-nn,19967.1834,6.33125542
3,LeNet-MNIST_none_tflite-builtins-INT8_none_dat...,W-int8_A-int8,1,-Ofast,cmsis-nn,8045.5337,15.71273489
9,LeNet-MNIST_none_tflite-builtins_none_none,W-float32_A-float32,1,-Os,cmsis-nn,41439.5183,3.0506469
11,LeNet-MNIST_none_tflite-builtins-INT8_none_dat...,W-int8_A-int8,1,-Os,cmsis-nn,9410.8728,13.43311516
4,LeNet-MNIST_none_tflite-builtins_none_none,W-float32_A-float32,0,-Ofast,none,104307.814,1.21196422
6,LeNet-MNIST_none_tflite-builtins-INT8_none_dat...,W-int8_A-int8,0,-Ofast,none,18215.7146,6.9400153




min_index 1
max_index 18


MCU                                                          NUCLEO_L496ZG
model                    01d_ResNet20_CIFAR-10_none_tflite-builtins-INT...
model_type                                                   W-int8_A-int8
FPU_status                                                               0
compiler_optimization                                                  -Os
cmsis-nn                                                              none
Name: 18, dtype: object

Unnamed: 0,model,model_type,FPU_status,compiler_optimization,cmsis-nn,tflu_mcu_benchmark_mean_50,relative_speedup
11,01d_ResNet20_CIFAR-10_none_tflite-builtins_non...,W-float32_A-float32,0,-Ofast,cmsis-nn,,
7,01d_ResNet20_CIFAR-10_none_tflite-builtins-INT...,W-int8_A-int8,0,-Ofast,cmsis-nn,2223333.86,11.95696841
23,01d_ResNet20_CIFAR-10_none_tflite-builtins_non...,W-float32_A-float32,0,-Os,cmsis-nn,,
19,01d_ResNet20_CIFAR-10_none_tflite-builtins-INT...,W-int8_A-int8,0,-Os,cmsis-nn,2844980.8,9.34429249
5,01d_ResNet20_CIFAR-10_none_tflite-builtins_non...,W-float32_A-float32,1,-Ofast,cmsis-nn,,
1,01d_ResNet20_CIFAR-10_none_tflite-builtins-INT...,W-int8_A-int8,1,-Ofast,cmsis-nn,2217008.96,11.99108041
17,01d_ResNet20_CIFAR-10_none_tflite-builtins_non...,W-float32_A-float32,1,-Os,cmsis-nn,,
13,01d_ResNet20_CIFAR-10_none_tflite-builtins-INT...,W-int8_A-int8,1,-Os,cmsis-nn,2839520.76,9.36226038
10,01d_ResNet20_CIFAR-10_none_tflite-builtins_non...,W-float32_A-float32,0,-Ofast,none,,
6,01d_ResNet20_CIFAR-10_none_tflite-builtins-INT...,W-int8_A-int8,0,-Ofast,none,12090503.92,2.19877789




min_index 1
max_index 22


MCU                                                           DISCO_F469NI
model                    01d_ResNet20_CIFAR-10_none_tflite-builtins_non...
model_type                                             W-float32_A-float32
FPU_status                                                               0
compiler_optimization                                                  -Os
cmsis-nn                                                              none
Name: 22, dtype: object

Unnamed: 0,model,model_type,FPU_status,compiler_optimization,cmsis-nn,tflu_mcu_benchmark_mean_50,relative_speedup
11,01d_ResNet20_CIFAR-10_none_tflite-builtins_non...,W-float32_A-float32,0,-Ofast,cmsis-nn,21002112.26,1.37212437
7,01d_ResNet20_CIFAR-10_none_tflite-builtins-INT...,W-int8_A-int8,0,-Ofast,cmsis-nn,985545.88,29.24015063
23,01d_ResNet20_CIFAR-10_none_tflite-builtins_non...,W-float32_A-float32,0,-Os,cmsis-nn,28631326.5,1.00650279
19,01d_ResNet20_CIFAR-10_none_tflite-builtins-INT...,W-int8_A-int8,0,-Os,cmsis-nn,1393720.06,20.67668451
5,01d_ResNet20_CIFAR-10_none_tflite-builtins_non...,W-float32_A-float32,1,-Ofast,cmsis-nn,6151933.48,4.6843013
1,01d_ResNet20_CIFAR-10_none_tflite-builtins-INT...,W-int8_A-int8,1,-Ofast,cmsis-nn,984317.6,29.27663793
17,01d_ResNet20_CIFAR-10_none_tflite-builtins_non...,W-float32_A-float32,1,-Os,cmsis-nn,14186869.32,2.03128043
13,01d_ResNet20_CIFAR-10_none_tflite-builtins-INT...,W-int8_A-int8,1,-Os,cmsis-nn,1391340.8,20.71204264
10,01d_ResNet20_CIFAR-10_none_tflite-builtins_non...,W-float32_A-float32,0,-Ofast,none,21002183.34,1.37211972
6,01d_ResNet20_CIFAR-10_none_tflite-builtins-INT...,W-int8_A-int8,0,-Ofast,none,6507424.8,4.42840461




min_index 1
max_index 14


MCU                                                          NUCLEO_F767ZI
model                    01d_ResNet20_CIFAR-10_none_tflite-builtins_non...
model_type                                             W-float32_A-float32
FPU_status                                                               0
compiler_optimization                                                  -Os
cmsis-nn                                                              none
Name: 14, dtype: object

Unnamed: 0,model,model_type,FPU_status,compiler_optimization,cmsis-nn,tflu_mcu_benchmark_mean_50,relative_speedup
7,01d_ResNet20_CIFAR-10_none_tflite-builtins_non...,W-float32_A-float32,0,-Ofast,cmsis-nn,11046480.06,1.40912313
5,01d_ResNet20_CIFAR-10_none_tflite-builtins-INT...,W-int8_A-int8,0,-Ofast,cmsis-nn,451766.0,34.45556009
15,01d_ResNet20_CIFAR-10_none_tflite-builtins_non...,W-float32_A-float32,0,-Os,cmsis-nn,15552876.26,1.00083421
13,01d_ResNet20_CIFAR-10_none_tflite-builtins-INT...,W-int8_A-int8,0,-Os,cmsis-nn,635650.36,24.48807008
3,01d_ResNet20_CIFAR-10_none_tflite-builtins_non...,W-float32_A-float32,1,-Ofast,cmsis-nn,1613397.56,9.6478704
1,01d_ResNet20_CIFAR-10_none_tflite-builtins-INT...,W-int8_A-int8,1,-Ofast,cmsis-nn,449302.14,34.64450572
11,01d_ResNet20_CIFAR-10_none_tflite-builtins_non...,W-float32_A-float32,1,-Os,cmsis-nn,6053537.6,2.57136431
9,01d_ResNet20_CIFAR-10_none_tflite-builtins-INT...,W-int8_A-int8,1,-Os,cmsis-nn,629710.14,24.71907243
6,01d_ResNet20_CIFAR-10_none_tflite-builtins_non...,W-float32_A-float32,0,-Ofast,none,11035578.02,1.4105152
4,01d_ResNet20_CIFAR-10_none_tflite-builtins-INT...,W-int8_A-int8,0,-Ofast,none,1121686.46,13.87718504






---