# Data Cleaning

This notebook reads the individual evaluation results and merges them into single dataframes.
Additionally, it creates Excel tables which can be used for manual exploration.

The results are already in the folder `results` and `results_layers`.


In [8]:
import numpy as np
import pandas as pd
pd.options.mode.chained_assignment = None 
import matplotlib.pyplot as plt
import matplotlib
import ipywidgets as widgets
from ipywidgets import interact, interactive, fixed, interact_manual, Layout
import glob

import seaborn as sns
sns.set(style="ticks", color_codes=True)

print("Numpy Version:\t\t", np.__version__)
print("Pandas Version:\t\t", pd.__version__)
print("Matplotlib Version:\t", matplotlib.__version__)

Numpy Version:		 1.19.0
Pandas Version:		 1.0.5
Matplotlib Version:	 3.2.2


## Combined table for single model

This table contains all the data collected for this model -- accross all the MCUs

### Benchmark + Verification

#### LeNet-MNIST

In [3]:
l4_df = pd.read_pickle('results/L4/NUCLEO_L496ZG_LeNet-MNIST_benchmarking+verification_results_2020-07-24.pkl')
f4_df = pd.read_pickle('results/F4/DISCO_F469NI_LeNet-MNIST_benchmarking+verification_results_2020-07-18.pkl')
f7_df = pd.read_pickle('results/F7/NUCLEO_F767ZI_LeNet-MNIST_benchmarking+verification_results_2020-07-22.pkl')

df = pd.concat([l4_df, f4_df, f7_df])

Write to excel file which can be used for manual data acquisition

In [4]:
df.to_pickle('results/LeNet-MNIST_benchmarking+verification_results.pkl')
df.to_excel('results/LeNet-MNIST_benchmarking+verification_results.xlsx')

#### ResNet20-CIFAR-10

In [6]:
l4_df = pd.read_pickle('results/L4/NUCLEO_L496ZG_01d_ResNet20_CIFAR-10_benchmarking_results_2020-07-20.pkl')
f4_df = pd.read_pickle('results/F4/DISCO_F469NI_01d_ResNet20_CIFAR-10_benchmarking_results_2020-07-19.pkl')
f7_df = pd.read_pickle('results/F7/NUCLEO_F767ZI_01d_ResNet20_CIFAR-10_benchmarking_results_2020-07-22.pkl')

df = pd.concat([l4_df, f4_df, f7_df])

Write to excel file which can be used for manual data acquisition

In [7]:
df.to_pickle('results/01d_ResNet20_CIFAR-10_benchmarking+verification_results.pkl')
df.to_excel('results/01d_ResNet20_CIFAR-10_benchmarking+verification_results.xlsx')

#### Both models

In [2]:
l4_df = pd.read_pickle('results/L4/NUCLEO_L496ZG_LeNet-MNIST_benchmarking+verification_results_2020-07-24.pkl')
f4_df = pd.read_pickle('results/F4/DISCO_F469NI_LeNet-MNIST_benchmarking+verification_results_2020-07-18.pkl')
f7_df = pd.read_pickle('results/F7/NUCLEO_F767ZI_LeNet-MNIST_benchmarking+verification_results_2020-07-22.pkl')

df = pd.concat([l4_df, f4_df, f7_df])

l4_df = pd.read_pickle('results/L4/NUCLEO_L496ZG_01d_ResNet20_CIFAR-10_benchmarking_results_2020-07-20.pkl')
f4_df = pd.read_pickle('results/F4/DISCO_F469NI_01d_ResNet20_CIFAR-10_benchmarking_results_2020-07-19.pkl')
f7_df = pd.read_pickle('results/F7/NUCLEO_F767ZI_01d_ResNet20_CIFAR-10_benchmarking_results_2020-07-22.pkl')

df = pd.concat([df, l4_df, f4_df, f7_df])

Write to excel file which can be used for manual data acquisition

In [3]:
df.to_pickle('results/A_aggregated_benchmarking+verification_results.pkl')
df.to_excel('results/A_aggregated_benchmarking+verification_results.xlsx')

### Layer Benchmark

Write to excel file which can be used for manual data acquisition

## Combined table for all layer measurements across all MCUs and models

This table contains all the layer measurements data -- accross all the models and MCUs.

In [10]:
l4_df = pd.read_pickle('results_layers/L4/NUCLEO_L496ZG_LeNet-MNIST_layer_results_2020-07-24.pkl')
f4_df = pd.read_pickle('results_layers/F4/DISCO_F469NI_LeNet-MNIST_layer_results_2020-07-22.pkl')
f7_df = pd.read_pickle('results_layers/F7/NUCLEO_F767ZI_LeNet-MNIST_layer_results_2020-07-23.pkl')

df = pd.concat([l4_df, f4_df, f7_df])

l4_df = pd.read_pickle('results_layers/L4/NUCLEO_L496ZG_01d_ResNet20_CIFAR-10_layer_results_2020-07-24.pkl')
f4_df = pd.read_pickle('results_layers/F4/DISCO_F469NI_01d_ResNet20_CIFAR-10_layer_results_2020-07-22.pkl')
f7_df = pd.read_pickle('results_layers/F7/NUCLEO_F767ZI_01d_ResNet20_CIFAR-10_layer_results_2020-07-22.pkl')

df = pd.concat([df, l4_df, f4_df, f7_df])

Write to excel file which can be used for manual data acquisition

In [11]:
df.to_pickle('results_layers/A_aggregated_benchmarking-layers_results.pkl')
df.to_excel('results_layers/A_aggregated_benchmarking-layers_results.xlsx')

### Benchmark + Verification

Write to excel file which can be used for manual data acquisition

### Layer Benchmark

Write to excel file which can be used for manual data acquisition

### Add relative latency time for each layer to dataframes

In [43]:
df = pd.read_pickle('results_layers/L4/NUCLEO_L496ZG_LeNet-MNIST_layer_results_2020-07-24.pkl')
mcu = 'L4'
model = 'LeNet-MNIST'

In [28]:
df = pd.read_pickle('results_layers/L4/NUCLEO_L496ZG_01d_ResNet20_CIFAR-10_layer_results_2020-07-24.pkl')
mcu = 'L4'
model = '01d_ResNet20_CIFAR-10'

In [9]:
filenames = glob.glob('results_layers/*/*.pkl')
print(filenames)
for filename in filenames:
    add_relative_latency(filename)

['results_layers/F4/DISCO_F469NI_LeNet-MNIST_layer_results_2020-07-22.pkl', 'results_layers/F4/DISCO_F469NI_01d_ResNet20_CIFAR-10_layer_results_2020-07-22.pkl', 'results_layers/L4/NUCLEO_L496ZG_01d_ResNet20_CIFAR-10_layer_results_2020-07-24.pkl', 'results_layers/L4/NUCLEO_L496ZG_LeNet-MNIST_layer_results_2020-07-24.pkl', 'results_layers/F7/NUCLEO_F767ZI_LeNet-MNIST_layer_results_2020-07-23.pkl', 'results_layers/F7/NUCLEO_F767ZI_01d_ResNet20_CIFAR-10_layer_results_2020-07-22.pkl']


In [6]:
def add_relative_latency(filename):
    df = pd.read_pickle(filename)

    measurements = df.time.unique()
    for measurement in measurements:
        filter = (df.time == measurement)
        df.loc[filter, 'layer_latency_relativ'] = df.loc[filter]['layer_latency_mean'] / df.loc[filter]['layer_latency_mean'].sum()

    df.to_pickle(filename)