# 01_Figures

This repository contains the code for regenerating all the plots/figures of the paper.

In [1]:
%run '00_Init.ipynb'

---

# Figure 2: Inference Latency


### Load the data

In [2]:
model = 'LeNet-MNIST'
l4_df = pd.read_pickle('data/NUCLEO_L496ZG_LeNet-MNIST_benchmarking+verification_results_2020-07-24.pkl').sort_values(by=['model_type'])
f4_df = pd.read_pickle('data/DISCO_F469NI_LeNet-MNIST_benchmarking+verification_results_2020-07-18.pkl').sort_values(by=['model_type'])
f7_df = pd.read_pickle('data/NUCLEO_F767ZI_LeNet-MNIST_benchmarking+verification_results_2020-07-22.pkl').sort_values(by=['model_type'])

lenet_df = {"L4": l4_df, "F4": f4_df, "F7": f7_df}

model = '01d_ResNet20_CIFAR-10'
l4_df = pd.read_pickle('data/NUCLEO_L496ZG_01d_ResNet20_CIFAR-10_benchmarking_results_2020-07-20.pkl').sort_values(by=['model_type'])
f4_df = pd.read_pickle('data/DISCO_F469NI_01d_ResNet20_CIFAR-10_benchmarking_results_2020-07-19.pkl').sort_values(by=['model_type'])
f7_df = pd.read_pickle('data/NUCLEO_F767ZI_01d_ResNet20_CIFAR-10_benchmarking_results_2020-07-22.pkl').sort_values(by=['model_type'])

resnet_df = {"L4": l4_df, "F4": f4_df, "F7": f7_df}

super_df = pd.DataFrame()
for mcu in resnet_df:
    super_df= super_df.append(resnet_df[mcu])
    super_df= super_df.append(lenet_df[mcu])
super_df['tflu_mcu_benchmark_mean'] = super_df['tflu_mcu_benchmark_mean'].fillna(super_df['tflu_mcu_benchmark_mean_50'])

In [3]:
hue = None
filter_dict = {
    'compiler_optimization': '-Ofast',
    #'cmsis-nn': 'none',
    #'FPU_status': 1
}

for dfs in [lenet_df, resnet_df]:
        for mcu in dfs:
            df = dfs[mcu]

            model_name = get_model_name(df['model'].unique()[0])


            filter = (df['weights'] == df['activations'])
            
            filter_descr = ''
            for key in filter_dict:
                filter = filter & (df[key] == filter_dict[key])
                filter_descr += (str(filter_dict[key]) + '_')
                


            df_filtered = df.loc[filter]
            df_filtered['combined_type'] = df_filtered['model_type'] + '_' + df_filtered['cmsis-nn'] + '_' + df_filtered['FPU_status'].astype(str)
            

            # let's sort the filtered df so that the order in the plots match
            df_filtered = df_filtered.sort_values(by=['model_type'])
            df_filtered = df_filtered.reset_index()
            df_filtered = df_filtered.drop([1,3,6,7])
            df_filtered = df_filtered.sort_values(by=['FPU_status'])


            try:
                df_filtered.loc[:,('tflu_mcu_benchmark_mean_50')] /= 1000
                fig = sns.catplot(y='combined_type', x='tflu_mcu_benchmark_mean_50', kind='bar', data=df_filtered, hue=hue,  height=2, aspect=16/9)
                display(df_filtered[['combined_type', 'tflu_mcu_benchmark_mean_50']].drop_duplicates())

            except KeyError:
                df_filtered.loc[:,('tflu_mcu_benchmark_mean')] /= 1000
                fig = sns.catplot(y='combined_type', x='tflu_mcu_benchmark_mean', kind='bar', data=df_filtered, hue=hue, height=2, aspect=16/9)
                display(df_filtered[['combined_type','tflu_mcu_benchmark_mean']].drop_duplicates())


            #plt.title(mcu + ' - ' + model_name  + json.dumps(filter_dict, indent=4)[1:-1])

            plt.xlabel("Inference Latency [ms]")
            plt.ylabel("Model Type")
 
            fig.set_yticklabels(['Unoptimized (U)', 'U + FPU', 'Quantized (Q)', 'Q + CMSIS-NN'])

            
            filename = f'figures/fig2_{mcu}_{model_name}_{filter_descr}'
        
            print(filename)

            plt.tight_layout()
            plt.show()
            plt.savefig(filename + '.pdf')

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Unnamed: 0,combined_type,tflu_mcu_benchmark_mean
2,W-float32_A-float32_none_0,405.645166
0,W-float32_A-float32_none_1,112.1962348
4,W-int8_A-int8_none_1,120.6695127
5,W-int8_A-int8_cmsis-nn_1,36.391342


figures/fig2_L4_LeNet-MNIST_-Ofast_


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Unnamed: 0,combined_type,tflu_mcu_benchmark_mean
0,W-float32_A-float32_none_0,190.1940494
4,W-int8_A-int8_none_0,65.3959346
5,W-int8_A-int8_cmsis-nn_0,16.4595706
2,W-float32_A-float32_none_1,58.5430699


figures/fig2_F4_LeNet-MNIST_-Ofast_


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Unnamed: 0,combined_type,tflu_mcu_benchmark_mean
2,W-float32_A-float32_none_0,104.307814
0,W-float32_A-float32_none_1,19.5964754
4,W-int8_A-int8_none_1,17.8879866
5,W-int8_A-int8_cmsis-nn_1,8.0455337


figures/fig2_F7_LeNet-MNIST_-Ofast_


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Unnamed: 0,combined_type,tflu_mcu_benchmark_mean_50
0,W-float32_A-float32_cmsis-nn_0,
4,W-int8_A-int8_none_0,12090.50392
5,W-int8_A-int8_cmsis-nn_0,2223.33386
2,W-float32_A-float32_none_1,


figures/fig2_L4_ResNet20_CIFAR-10_-Ofast_


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Unnamed: 0,combined_type,tflu_mcu_benchmark_mean_50
0,W-float32_A-float32_cmsis-nn_0,21002.11226
4,W-int8_A-int8_none_0,6507.4248
5,W-int8_A-int8_cmsis-nn_0,985.54588
2,W-float32_A-float32_none_1,6151.92884


figures/fig2_F4_ResNet20_CIFAR-10_-Ofast_


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Unnamed: 0,combined_type,tflu_mcu_benchmark_mean_50
2,W-float32_A-float32_none_0,11035.57802
0,W-float32_A-float32_none_1,1613.53754
4,W-int8_A-int8_none_1,1119.74508
5,W-int8_A-int8_cmsis-nn_1,449.30214


figures/fig2_F7_ResNet20_CIFAR-10_-Ofast_


---

# Figure 3: Pareto Frontier

In [4]:
df = pd.read_pickle('data/A_Aggregated_energy_inference_table.pkl')

df['model_type'] = df['model_type'].str.replace('_','-')
df['MCU'] = df['MCU'].str.replace('_','-')

di = {"NUCLEO-L496ZG": "L4", "DISCO-F469NI": "F4", "NUCLEO-F767ZI": "F7"}
df = df.replace({"MCU": di})
df = df.reset_index()

In [5]:
models = df['model_name'].unique()

for model in models:
    filter = (df['weights'] == df['activations']) & (df['model_name'] == model)
    df_filtered = df.loc[filter]
    model_name = get_model_name(df_filtered['model'].unique()[0])

    # let's sort the filtered df so that the order in the plots match
    df_filtered = df_filtered.sort_values(by=['model_type', 'MCU'])
    df_filtered.loc[:,('energy_mean')] *= 1000
    df_filtered.loc[:,('latency_mean')] *= 1000

    # determining pareto points
    x_index = df_filtered['latency_mean'].idxmin()
    y_index = df_filtered['energy_mean'].idxmin()
    
    
    x_values = df_filtered.loc[[x_index,y_index,y_index],'latency_mean']
    y_values = df_filtered.loc[[x_index,x_index,y_index],'energy_mean']
    print(x_values[0:2])
    print(y_values)
    

    fig, ax = plt.subplots(figsize=(4,3))

    plt.plot(x_values, y_values, c='r', ls='-', label='Pareto Front', linewidth=2);

    
    sns.scatterplot(ax=ax, x="latency_mean", y="energy_mean", hue="MCU", data=df_filtered, 
                    hue_order=["L4", "F4", "F7"], legend='brief', s=50, style='MCU') #, markers=['.', 'x', '*'])
    display(df_filtered[['model_type','latency_mean', 'energy_mean']])

    plt.legend()
    #sns.lineplot(ax=ax, x=x_values[0:2], y=y_values[0:2], c='r', ls='--', label='Pareto Frontier');
    
    #sns.lineplot(ax=ax, x=x_values[1:2], y=y_values[1:2], c='r', ls='--', label='Pareto Frontier');

    #sns.lineplot(ax=ax, x=p1, y=[p2.iloc[0],5], c='g', ls='--', label='Pareto Frontier');

    #plt.legend()
    #plt.legend(['Pareto Frontier', 'L4', 'F4', 'F7'])
    #plt.title(model_name)

    plt.xlabel("Inference Latency [ms]")
    plt.ylabel("Inference Energy Consumption [mJ]")
    
    filename = f'figures/fig3_scatter_pareto_{model_name}'
    print(filename)

    plt.tight_layout()
    plt.show()
    plt.savefig(filename + '.pdf')

5      448.09717988
14    2209.18812500
Name: latency_mean, dtype: float64
5     193.27060836
5     193.27060836
14     98.19830809
Name: energy_mean, dtype: float64


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Unnamed: 0,model_type,latency_mean,energy_mean
15,W-float32-A-float32,6131.24085938,1244.3693555
20,W-float32-A-float32,6131.235,1283.69909146
0,W-float32-A-float32,1607.76025979,633.66769753
16,W-float32-A-float32,1607.24151786,638.23991973
10,W-int8-A-int8,983.23891715,223.95316562
21,W-int8-A-int8,6486.3202227,1279.32453491
3,W-int8-A-int8,1113.99444731,584.32011147
5,W-int8-A-int8,448.09717988,193.27060836
14,W-int8-A-int8,2209.188125,98.19830809
17,W-int8-A-int8,12041.714375,550.70729805


figures/fig3_scatter_pareto_ResNet20_CIFAR-10
19     8.08038651
11    36.46437500
Name: latency_mean, dtype: float64
19    3.21706050
19    3.21706050
11    1.58983832
Name: energy_mean, dtype: float64


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Unnamed: 0,model_type,latency_mean,energy_mean
1,W-float32-A-float32,58.390625,11.94871866
8,W-float32-A-float32,59.35484375,12.78852949
7,W-float32-A-float32,19.95101351,8.38124693
18,W-float32-A-float32,19.58125,8.18971318
4,W-float32-A-float32,113.01296875,5.10258974
12,W-float32-A-float32,112.06140625,5.09570572
6,W-int8-A-int8,16.08033088,3.74759469
13,W-int8-A-int8,65.46621622,14.65044108
2,W-int8-A-int8,17.91729798,8.43169922
19,W-int8-A-int8,8.08038651,3.2170605


figures/fig3_scatter_pareto_LeNet-MNIST


---

# Figure 4: Non-linearity Dense Layer: Ops and Latency

In [6]:
filename = 'data/F-combined_no-cmsis_N_01ec_DenseBenchmark_varying-input_U-440_layer_results_2021-03-31.pkl'

df = pd.read_pickle(filename)
try:
    df = df.drop((df[df['error'].notna()]).index)
except:
    pass

df['model_type'] = df['model_type'].str.replace('_','-')
df['MCU'] = df['MCU'].str.replace('_','-')

di = {"NUCLEO-L496ZG": "L4", "DISCO-F469NI": "F4", "NUCLEO-F767ZI": "F7"}

df = df.replace({"MCU": di})

for i in range(0,500,1):
    string = f"_U-{i:03}"
    filter = df['model'].str.contains(string)
    df.loc[filter, 'dense_units'] = int(i)
    
filter = (df['layer'].str.contains('2_FULLY'))
df.loc[filter, 'layer_FLOPs'] = 2 * (10*10) * df['dense_units']
df.loc[filter, 'layer_MACCs'] = df['dense_units'] * (10*10 + 1)


# of interest -> varying input
filter = (df['layer'].str.contains('3_FULLY'))
df.loc[filter, 'layer_FLOPs'] = 2 * df['dense_units'] * 450
df.loc[filter, 'layer_MACCs'] = 450 * (df['dense_units']  + 1)



filter = (df['layer'].str.contains('4_FULLY'))
df.loc[filter, 'layer_FLOPs'] = 2 * 450 * df['dense_units'] 
df.loc[filter, 'layer_MACCs'] = df['dense_units'] * (450  + 1)


# of interest -> varying input
filter = (df['layer'].str.contains('5_FULLY'))
df.loc[filter, 'layer_FLOPs'] = 2* df['dense_units'] * 450
df.loc[filter, 'layer_MACCs'] = 450 * (df['dense_units']  + 1)



filter = (df['layer'].str.contains('6_FULLY'))
df.loc[filter, 'layer_FLOPs'] = 2 * 450 * 10
df.loc[filter, 'layer_MACCs'] = 10 * (450  + 1)



filter = ~(df['layer'].str.contains('FULLY'))
df.loc[filter, 'layer_FLOPs'] = 100
df.loc[filter, 'layer_MACCs'] = 100


models = df['model'].unique()

for model in models:
    filter = (df['model'] == model)
    df_filtered = df.loc[filter]
    #display(df_filtered)
    df.loc[filter, 'layer_FLOPs_sum'] = df_filtered['layer_FLOPs'].sum()
    df.loc[filter, 'layer_FLOPs_relative'] = df_filtered['layer_FLOPs'] / df.loc[filter,'layer_FLOPs_sum']

In [7]:
df['dense_type'] = np.where(df['dense_units'] % 2, 'odd', 'even')

filter = ~(df['dense_units'] % 4).astype(bool)
df.loc[filter, 'dense_type'] = 'modulo 4'

In [8]:
hue = None
models = df['model'].unique()
model = models[0]

mcus = df['MCU'].unique()

for mcu in mcus:

    model_name = get_model_name(model)
    filter = (df['layer'].str.contains('3_FULLY'))
    filter &= (df['cmsis-nn'] == 'cmsis-nn')

    #filter = (df['filters'] == 4)
    df_filtered = df.loc[filter]
    #df_filtered['dense_units'] = df_filtered['dense_units'].astype("category")

    df_filtered.loc[:,('layer_latency_mean')] /= 1000

    #df_filtered = df_filtered.sort_values(by=['filter_type'])


    #sns.scatterplot(x='layer_MACCs', y='layer_latency_mean', data=df_filtered, size='layer_MACCs') 
    g = sns.relplot(x='layer_MACCs', y='layer_latency_mean', data=df_filtered, aspect=4/3, height=3, # hue='dense_units',
                    hue='dense_type', style='dense_type', legend=None, markers=True, ci=None, kind='line', dashes=False, alpha=0.9, linewidth=0)
        
    #g = sns.relplot(x="layer_MACCs", y="layer_latency_mean", data=df_filtered, aspect=4/3, height=3, 
     #               hue='filter_type', style="filter_type", legend=None, markers=True, ci=None, kind='line', dashes=False, alpha=0.9, linewidth=0.2)

    g.axes[0][0].set_ylim(8.5, 12.5)
    g.axes[0][0].set_xlim(201500, 216300)


    ticks = g.axes[0][0].get_xticks()

    xlabels = ['{:,.0f}'.format(x) + 'K' for x in ticks / 1000]

    g.set_xticklabels(xlabels)

    #plt.title(mcu + '-' + model_name + '-' + model_type)

    plt.ylabel("Layer Latency [ms]")
    plt.xlabel("MACC Operations per Layer [#]")
    
    plt.legend(['Divisible by 4', 'Odd', 'Even'], loc='upper left', title='Input Length')


    filename = f'figures/fig4_dense-units_latency_{mcu}_{model}'

    print(filename)
    plt.tight_layout()
    plt.show()
    plt.savefig(filename + '.pdf')
    #plt.savefig(filename + '.png', dpi=300)

unknown model name


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

figures/fig4_dense-units_latency_L4_01ec_DenseBenchmark_varying-input_U-440_Q-full


---

# Figure 5: Non-linearity Conv Layer: Ops and Latency

In [9]:
filename = 'data/F-combined_no-cmsis_N_01dg_ConvBenchmark_SingleConv_F-100_K-1_layer_results_2021-04-01.pkl'

df = pd.read_pickle(filename)
try:
    df = df.drop((df[df['error'].notna()]).index)
except:
    pass

df['model_type'] = df['model_type'].str.replace('_','-')
df['MCU'] = df['MCU'].str.replace('_','-')

di = {"NUCLEO-L496ZG": "L4", "DISCO-F469NI": "F4", "NUCLEO-F767ZI": "F7"}

df = df.replace({"MCU": di})

for i in range(1,200):
    string = f"_K-{i}_"
    filter = df['model'].str.contains(string)
    df.loc[filter, 'kernel'] = i
    string = f"_F-{i}_"
    filter = df['model'].str.contains(string)
    df.loc[filter, 'filters'] = i
    
# first layer has only a single input channel
filter = (df['layer'].str.contains('1_CONV'))
df.loc[filter, 'layer_FLOPs'] = 32*32 * 1 *  (2*df['kernel'] * df['kernel'] + 1) * df['filters']
df.loc[filter, 'layer_MACCs'] = 32*32 * 1 *  (df['kernel'] * df['kernel'] + 1) * df['filters']
    
filter = (df['layer'].str.contains('CONV') & ~(df['layer'].str.contains('1_CONV')))
df.loc[filter, 'layer_FLOPs'] = 32*32 * df['filters'] *  (2*df['kernel'] * df['kernel'] + 1) * 60
df.loc[filter, 'layer_MACCs'] = 32*32 * df['filters'] *  (df['kernel'] * df['kernel'] + 1) * 60


filter = (df['layer'].str.contains('FULLY'))
df.loc[filter, 'layer_FLOPs'] = 2* (32*32 * 60) * 10
df.loc[filter, 'layer_MACCs'] = (32*32 * 60 + 1) * 10

filter = ~(df['layer'].str.contains('FULLY')) & ~(df['layer'].str.contains('CONV'))
df.loc[filter, 'layer_FLOPs'] = 10
df.loc[filter, 'layer_MACCs'] = 10



models = df['model'].unique()

for model in models:
    filter = (df['model'] == model)
    df_filtered = df.loc[filter]
    
    df.loc[filter, 'layer_FLOPs_sum'] = df_filtered['layer_FLOPs'].sum()
    df.loc[filter, 'layer_FLOPs_relative'] = df_filtered['layer_FLOPs'] / df.loc[filter,'layer_FLOPs_sum']
    
df.loc[:,'filters_kernel'] = 'F-' + df.loc[:,'filters'].astype(str) + '_K-' + df.loc[:,'kernel'].astype(str)


df['filter_type'] = np.where(df['filters'] % 2, 'odd', 'even')
filter = ~(df['filters'] % 4).astype(bool)
df.loc[filter, 'filter_type'] = 'modulo 4'

In [10]:
df[['layer','layer_latency_mean', 'layer_MACCs','kernel']]

Unnamed: 0,layer,layer_latency_mean,layer_MACCs,kernel
0,0_QUANTIZE,573.6,10.0,1.0
1,1_CONV_2D,203139.9,204800.0,1.0
2,2_CONV_2D,1833198.3,12288000.0,1.0
3,3_RESHAPE,8453.2,10.0,1.0
4,4_FULLY_CONNECTED,88364.9,614410.0,1.0
...,...,...,...,...
622,2_CONV_2D,2622974.9,73728000.0,3.0
623,3_RESHAPE,8453.2,10.0,3.0
624,4_FULLY_CONNECTED,24772.5,614410.0,3.0
625,5_SOFTMAX,196.5,10.0,3.0


In [11]:

models = df['model'].unique()
model = models[0]

mcus = df['MCU'].unique()

for mcu in mcus:


    filter = (df['layer'].str.contains('CONV') & ~(df['layer'].str.contains('1_CONV')))
    filter &= df['kernel'] == 3
    filter &= df['cmsis-nn'] == 'cmsis-nn'

    df_filtered = df.loc[filter]

    # let's sort the filtered df so that the order in the plots match
    #model_type = df_filtered['model_type'].unique()[0]

    df_filtered = df_filtered.sort_values(by=['model_type', 'MCU'])

    df_filtered.loc[:,('layer_latency_mean')] /= 1000
    df_filtered.loc[:,('layer_latency_mean')] /= 1000
    #df_filtered.loc[:,'filters'].astype(float)

    #df_filtered = df_filtered.sort_values(by=['filter_type'])

    
    g = sns.relplot(x="layer_MACCs", y="layer_latency_mean", data=df_filtered, height=3, aspect=4/3,
                    hue='filter_type', style="filter_type", legend=None, markers=True, ci=None, kind='line', dashes=False, alpha=0.9, linewidth=0.2)

    display(linregress(df_filtered['layer_FLOPs'], df_filtered['layer_latency_mean']))
    
    
    display(df_filtered[['filters', 'layer_MACCs','layer_latency_mean']])

    g.axes[0][0].set_xlim(61440000, 73728000)
    ticks = g.axes[0][0].get_xticks()
    xlabels = ['{:,.0f}'.format(x) + 'M' for x in ticks / 1e6]
    g.set_xticklabels(xlabels)
    

    
    #plt.title(mcu + '-' + model_name + '-' + model_type)
    #plt.xscale('log')
    plt.xlabel("MACC Operations per Layer [#]")
    plt.ylabel("Layer Latency [s]")

    plt.legend(['Divisible by 4', 'Odd', 'Even'], loc='lower right', title="No. of Input Channels")

   # secax = .secondary_xaxis('top')
   # secax.set_xlabel('angle [rad]')
    
    
    ax2 = g.axes[0][0].twiny()  # instantiate a second axes that shares the same x-axis

    ax2.set_xlabel('No. of Input Channels')  # we already handled the x-label with ax1
    ax2.set_xticks(df_filtered['filters'].astype(int)[::2])
    ax2.set_xlim(100,120)
    ax2.grid(False)
    
    #ax2.plot(t, data2, color=color)

#    ax2.tick_params(axis='y', labelcolor=color)
    
#        ax2 = plt.twiny()

 #   #ax2 = ax.twiny()
  #  ax2.set_xticks(df_filtered['filters'].astype("category"))
    
    
    
    filename = f'figures/fig5_conv-regression_{mcu}_{model}'
 
    print(filename)

        #plt.legend()


    plt.tight_layout()
    plt.show()
    plt.savefig(filename + '.pdf')
    #plt.savefig(filename + '.png', dpi=300)

    #tikzplotlib.save(filename + '.tex', axis_width=AXIS_WIDTH, axis_height=AXIS_HEIGHT)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

LinregressResult(slope=1.9044774133487125e-08, intercept=0.08485098571428651, rvalue=0.7951587764696911, pvalue=1.6500038342802706e-05, stderr=3.3320022277107366e-09)

Unnamed: 0,filters,layer_MACCs,layer_latency_mean
322,100.0,61440000.0,2.1970924
337,101.0,62054400.0,2.4259346
352,102.0,62668800.0,2.2900132
367,103.0,63283200.0,2.4734752
382,104.0,63897600.0,2.282769
397,105.0,64512000.0,2.5190012
412,106.0,65126400.0,2.3767302
427,107.0,65740800.0,2.566603
442,108.0,66355200.0,2.367143
457,109.0,66969600.0,2.6123049


figures/fig5_conv-regression_L4_01dg_ConvBenchmark_SingleConv_F-100_K-1_Q-full


---

# Appendix

## Figure 6: Inference Latency and Energy Consumption 

In [12]:
df = pd.read_pickle('data/A_Aggregated_energy_inference_table.pkl')

df['model_type'] = df['model_type'].str.replace('_','-')
df['MCU'] = df['MCU'].str.replace('_','-')

di = {"NUCLEO-L496ZG": "L4", "DISCO-F469NI": "F4", "NUCLEO-F767ZI": "F7"}
df = df.replace({"MCU": di})
df = df.reset_index()

### absolute

In [13]:
model_name = get_model_name(df['model'].unique()[0])


df_filtered = df.copy()

# let's sort the filtered df so that the order in the plots match
df_filtered = df_filtered.sort_values(by=['model_type'])


df_filtered.loc[:,('energy_mean')] *= 1000


fig = sns.lmplot(x="latency_mean", y="energy_mean", hue="MCU", markers=['s', 'o', 'X'], data=df_filtered, height=3, aspect=4/3,
                 hue_order=["L4", "F4", "F7"], legend='brief', legend_out=False, scatter_kws= {'s': 30}, line_kws={'linewidth': 1})


plt.xlabel("Inference Latency [s]")
plt.ylabel("Inference Energy Consumption [mJ]")

filename = f'figures/appendix_fig6_energy-regression-absolute'

print(filename)

plt.tight_layout()
plt.savefig(filename + '.pdf')

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

figures/appendix_fig6_energy-regression-absolute


### relative

In [14]:
mcus = df['MCU'].unique()

for mcu in mcus:
    filter = df['MCU'] == mcu
    df_filtered = df[filter]
    print(mcu)
    display(linregress(df_filtered['latency_mean'], df_filtered['energy_mean']))
    print('---')
    
    
    # normalize
    df.loc[filter, 'latency_mean_relative'] = df.loc[filter, 'latency_mean'] / df.loc[filter, 'latency_mean'].max()
    df.loc[filter, 'energy_mean_relative'] = df.loc[filter, 'energy_mean'] / df.loc[filter, 'energy_mean'].max()

F7


LinregressResult(slope=0.4144181931110935, intercept=0.00885585463827454, rvalue=0.9873363993199276, pvalue=5.028954036852878e-06, stderr=0.02718393585988921)

---
F4


LinregressResult(slope=0.20224916078795876, intercept=0.005423408653982609, rvalue=0.9993979940849389, pvalue=5.451878612074809e-10, stderr=0.002866306541853928)

---
L4


LinregressResult(slope=0.04574417604812563, intercept=-0.0005410756031233172, rvalue=0.9999865016892436, pvalue=2.733053601873694e-10, stderr=0.00011884062287667882)

---


In [15]:
model_name = get_model_name(df['model'].unique()[0])

df_filtered = df.copy()

# let's sort the filtered df so that the order in the plots match
df_filtered = df_filtered.sort_values(by=['model_type'])

df_filtered.loc[:,('energy_mean')] *= 1000


fig = sns.lmplot(x="latency_mean_relative", y="energy_mean_relative", hue="MCU", data=df_filtered, height=4, aspect=16/9,  hue_order=["L4", "F4", "F7"])

display(linregress(df_filtered['latency_mean_relative'], df_filtered['energy_mean_relative']))

plt.xlabel("Inference Latency [s]")
plt.ylabel("Inference Energy Consumption [mJ]")

filename = f'figures/appendix_fig6_energy-regression-normalized'

print(filename)
plt.legend()

plt.tight_layout()
plt.savefig(filename + '.pdf')

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

LinregressResult(slope=1.0289930486249046, intercept=0.005785947684707637, rvalue=0.9946116959441132, pvalue=3.641123440602983e-21, stderr=0.02398275710772669)

figures/appendix_fig6_energy-regression-normalized


## Figure 7: Inference Latency and Energy Consumption per Layer

In [16]:
df = pd.read_pickle('data/A_Aggregated_energy_inference_layer_table.pkl')

df['model_type'] = df['model_type'].str.replace('_','-')
df['MCU'] = df['MCU'].str.replace('_','-')

di = {"NUCLEO-L496ZG": "L4", "DISCO-F469NI": "F4", "NUCLEO-F767ZI": "F7"}
df = df.replace({"MCU": di})

### absolute

In [17]:
df_filtered = df.copy()

# let's sort the filtered df so that the order in the plots match
df_filtered = df_filtered.sort_values(by=['model_type','MCU'])


df_filtered.loc[:,('layer_energy_mean')] *= 1000


fig = sns.lmplot(x="layer_latency_mean", y="layer_energy_mean", hue="MCU", markers=['s', 'o', 'X'], data=df_filtered, height=3, aspect=4/3,
                 hue_order=["L4", "F4", "F7"], legend='brief', legend_out=False)



#plt.title('Regression accross all model, all MCUs, all layers')

plt.xlabel("Layer Latency [s]")
plt.ylabel("Layer Energy Consumption [mJ]")


filename = f'figures/appendix_fig7_layer-energy-regression_absolute'

print(filename)
#plt.legend(['L4', 'F4', 'F7'])
plt.tight_layout()
plt.savefig(filename + '.pdf')

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

figures/appendix_fig7_layer-energy-regression_absolute


In [18]:
mcus = df['MCU'].unique()

for mcu in mcus:
    filter = df['MCU'] == mcu
    df_filtered = df[filter]
    print(mcu)
    display(linregress(df_filtered['layer_latency_mean'], df_filtered['layer_energy_mean']))
    print('---')


F7


LinregressResult(slope=0.4150492639175373, intercept=0.00038512430903367423, rvalue=0.9875835802128052, pvalue=8.287064035076815e-142, stderr=0.005005091910049231)

---
F4


LinregressResult(slope=0.2020757659415724, intercept=0.000265817138543456, rvalue=0.9994048527800455, pvalue=2.2489780068932132e-256, stderr=0.0005287626020967181)

---
L4


LinregressResult(slope=0.04578387192945581, intercept=-3.477711155989299e-05, rvalue=0.9999563670329774, pvalue=4.890416960901635e-221, stderr=4.115638522577219e-05)

---


### relative

In [19]:
df_filtered = df.copy()

# let's sort the filtered df so that the order in the plots match
df_filtered = df_filtered.sort_values(by=['model_type'])


df_filtered.loc[:,('layer_energy_mean')] *= 1000
df_filtered = df_filtered.sort_values(by=['model_type','MCU'])


fig = sns.lmplot(x="layer_latency_relativ", y="layer_energy_relativ", hue="MCU", data=df_filtered, height=4, aspect=16/9, hue_order=["L4", "F4", "F7"])


plt.xlabel("Layer Inference Latency Relative")
plt.ylabel("Layer Energy Consumption Relative")

display(linregress(df_filtered['layer_latency_relativ'], df_filtered['layer_energy_relativ']))


filename = f'figures/appendix_fig7_layer-energy-regression_relative'

print(filename)
plt.legend()
plt.tight_layout()
plt.savefig(filename + '.pdf')

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

LinregressResult(slope=1.0011743960055124, intercept=-5.592361931011647e-05, rvalue=0.9995127472072092, pvalue=0.0, stderr=0.0014577455974796579)

figures/appendix_fig7_layer-energy-regression_relative


## Figure 8: Non-linearity Depth-wise Conv Layer: Latency and Ops

In [20]:
filename = 'data/F-combined_no-cmsis_N_01f_Depthwise-Conv_f-100_K-1_layer_results_2021-03-23.pkl'

df = pd.read_pickle(filename)
try:
    df = df.drop((df[df['error'].notna()]).index)
except:
    pass

df['model_type'] = df['model_type'].str.replace('_','-')
df['MCU'] = df['MCU'].str.replace('_','-')

di = {"NUCLEO-L496ZG": "L4", "DISCO-F469NI": "F4", "NUCLEO-F767ZI": "F7"}

df = df.replace({"MCU": di})

for i in range(1,5):
    string = f"_K-{i}_"
    filter = df['model'].str.contains(string)
    df.loc[filter, 'kernel'] = i
for i in range(1,200):
    string = f"_f-{i}_"
    filter = df['model'].str.contains(string)
    df.loc[filter, 'filters'] = i
    
# first layer has only a single input channel
filter = (df['layer'].str.contains('1_CONV'))
df.loc[filter, 'layer_FLOPs'] = 24*24 * 1 *  (2*df['kernel'] * df['kernel'] + 1) * df['filters']
df.loc[filter, 'layer_MACCs'] = 24*24 * 1 *  (df['kernel'] * df['kernel'] + 1) * df['filters']
    
filter = (df['layer'].str.contains('2_DEPTHWISE_CONV_2D'))
# 
df.loc[filter, 'layer_FLOPs'] = 24*24 * df['filters'] *  (2*df['kernel'] * df['kernel'] + 1) 
df.loc[filter, 'layer_MACCs'] = 24*24 * df['filters'] *  (df['kernel'] * df['kernel'] + 1)


filter = (df['layer'].str.contains('FULLY'))
df.loc[filter, 'layer_FLOPs'] = 2* (24*24 * df['filters']) * 10
df.loc[filter, 'layer_MACCs'] = (24*24* df['filters'] + 1) * 10

filter = ~(df['layer'].str.contains('FULLY')) & ~(df['layer'].str.contains('CONV'))
df.loc[filter, 'layer_FLOPs'] = 10
df.loc[filter, 'layer_MACCs'] = 10



models = df['model'].unique()

for model in models:
    filter = (df['model'] == model)
    df_filtered = df.loc[filter]
    
    df.loc[filter, 'layer_FLOPs_sum'] = df_filtered['layer_FLOPs'].sum()
    df.loc[filter, 'layer_FLOPs_relative'] = df_filtered['layer_FLOPs'] / df.loc[filter,'layer_FLOPs_sum']
    
df.loc[:,'filters_kernel'] = 'F-' + df.loc[:,'filters'].astype(str) + '_K-' + df.loc[:,'kernel'].astype(str)

df['filter_type'] = np.where(df['filters'] % 2, 'odd', 'even')

filter = ~(df['filters'] % 4).astype(bool)
df.loc[filter, 'filter_type'] = 'modulo 4'

In [21]:
models = df['model'].unique()
model = models[0]

filter = (df['layer'].str.contains('DEPTHWISE'))
filter &= (df['kernel'] == 3)

df_filtered = df.loc[filter]

# let's sort the filtered df so that the order in the plots match
#model_type = df_filtered['model_type'].unique()[0]

df_filtered = df_filtered.sort_values(by=['model_type', 'MCU'])

df_filtered.loc[:,('layer_latency_mean')] /= 1000
df_filtered.loc[:,('layer_latency_mean')] /= 1000
#df_filtered.loc[:,'filters'].astype(float)

#df_filtered = df_filtered.sort_values(by=['filter_type'])

display(df_filtered[['layer', 'kernel', 'filters', 'layer_MACCs', 'layer_FLOPs', 'layer_latency_mean']])
g = sns.relplot(x="layer_MACCs", y="layer_latency_mean", data=df_filtered, height=3, aspect=4/3,
                hue='filter_type', style="filter_type", legend=None, markers=True, ci=None, kind='line', dashes=False, alpha=0.9, linewidth=0.2)

display(linregress(df_filtered['layer_FLOPs'], df_filtered['layer_latency_mean']))


ticks = g.axes[0][0].get_xticks()
xlabels = ['{:,.0f}'.format(x) + 'K' for x in ticks / 1e3]
g.set_xticklabels(xlabels)


plt.xlabel("MACC Operations per Layer [#]")
plt.ylabel("Layer Latency [s]")


filename = f'figures/appendix_fig8_DW-conv-regression_{mcu}_{model}'

print(filename)

plt.legend(['Divisible by 4', 'Odd', 'Even'], loc='lower right', title="No. of Input Channels")

plt.tight_layout()
plt.show()
plt.savefig(filename + '.pdf')
#plt.savefig(filename + '.png', dpi=300)

Unnamed: 0,layer,kernel,filters,layer_MACCs,layer_FLOPs,layer_latency_mean
9,2_DEPTHWISE_CONV_2D,3.0,100.0,576000.0,1094400.0,0.150383
30,2_DEPTHWISE_CONV_2D,3.0,101.0,581760.0,1105344.0,0.1624133
51,2_DEPTHWISE_CONV_2D,3.0,102.0,587520.0,1116288.0,0.1579335
72,2_DEPTHWISE_CONV_2D,3.0,103.0,593280.0,1127232.0,0.1661033
93,2_DEPTHWISE_CONV_2D,3.0,104.0,599040.0,1138176.0,0.156046
114,2_DEPTHWISE_CONV_2D,3.0,105.0,604800.0,1149120.0,0.1683572
135,2_DEPTHWISE_CONV_2D,3.0,106.0,610560.0,1160064.0,0.163715
156,2_DEPTHWISE_CONV_2D,3.0,107.0,616320.0,1171008.0,0.1720568
177,2_DEPTHWISE_CONV_2D,3.0,108.0,622080.0,1181952.0,0.1617131
198,2_DEPTHWISE_CONV_2D,3.0,109.0,627840.0,1192896.0,0.174636


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

LinregressResult(slope=1.3440111880078986e-07, intercept=0.009559404761904727, rvalue=0.8678696572108077, pvalue=3.4606695067437606e-07, stderr=1.7650058260093508e-08)

figures/appendix_fig8_DW-conv-regression_L4_01f_Depthwise-Conv_f-100_K-1_Q-full


## Figure 9: Non-linearity Dense Layer: Input Length/Units and Latency

In [22]:
filename = 'data/F-combined_no-cmsis_N_01ec_DenseBenchmark_varying-input_U-440_layer_results_2021-03-31.pkl'

df = pd.read_pickle(filename)
try:
    df = df.drop((df[df['error'].notna()]).index)
except:
    pass

df['model_type'] = df['model_type'].str.replace('_','-')
df['MCU'] = df['MCU'].str.replace('_','-')

di = {"NUCLEO-L496ZG": "L4", "DISCO-F469NI": "F4", "NUCLEO-F767ZI": "F7"}

df = df.replace({"MCU": di})

for i in range(0,500,1):
    string = f"_U-{i:03}"
    filter = df['model'].str.contains(string)
    df.loc[filter, 'dense_units'] = int(i)

In [23]:
hue = 'dense_units'
models = df['model'].unique()
model = models[0]

mcus = df['MCU'].unique()

for mcu in mcus:

    model_name = get_model_name(model)

    filter = (df['layer'].str.contains('3_FULLY'))
    filter &= df['cmsis-nn'] == 'cmsis-nn'
    filter &= df['dense_units'] > 459
    filter &= df['dense_units'] < 491

    df_filtered = df.loc[filter]
    
    df_filtered['dense_units']= df_filtered['dense_units'].astype(int)
    df_filtered['dense_units'] = df_filtered['dense_units'].astype("category")

    df_filtered.loc[:,('layer_latency_mean')] /= 1000


    fig = sns.catplot(y='dense_units', x='layer_latency_mean', kind='bar', aspect=0.7, height=3.8, data=df_filtered)

    plt.xlabel("Layer Latency [ms]")
    plt.ylabel("Input Length")


    filename = f'figures/appendix_fig9_dense-units-latency_{mcu}_{model}'

    print(filename)

    plt.tight_layout()
    plt.show()
    plt.savefig(filename + '.pdf')
    # plt.savefig(filename + '.png', dpi=300)

unknown model name


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

figures/appendix_fig9_dense-units-latency_L4_01ec_DenseBenchmark_varying-input_U-440_Q-full
