### Setup plot environment  

In [102]:
# plotly
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# seaborn
import seaborn as sns
sns.set_style('darkgrid')

# matplotlib
import matplotlib.pyplot as plt
%matplotlib inline

### Load data  

In [103]:
import csv

data_filename = './data/onnx_conv_results_raw.csv'
# data_filename = './data/onnx_conv_results_resampled.csv'
with open(data_filename) as f:
    f_csv = csv.DictReader(f)
    fieldnames = f_csv.fieldnames
    data_raw = list(f_csv)
print('total items:', len(data_raw))
print('fieldnames:\n', fieldnames)

total items: 1144
fieldnames:
 ['model', 'repeat', 'latency', 'energy', 'N', 'C', 'H', 'W', 'kN', 'kC', 'kH', 'kW', 'pad_h', 'pad_w', 'dilation_h', 'dilation_w', 'stride_h', 'stride_w', 'group', 'has_bias']


### Process data

##### Util functions

In [104]:
from functools import reduce

def calculate_output_shape(cfg):
    # const int output_h = (height + 2 * pad_h - (dilation_h * (kernel_h - 1) + 1)) / stride_h + 1;
    # const int output_w = (width + 2 * pad_w - (dilation_w * (kernel_w - 1) + 1)) / stride_w + 1;
    height, width = cfg['image_shape']
    kernel_h, kernel_w = cfg['kernel_shape']
    pad_h, pad_w = cfg['pads']
    dilation_h, dilation_w = cfg['dilations']
    stride_h, stride_w = cfg['strides']
    output_h = (height + 2 * pad_h - (dilation_h * (kernel_h - 1) + 1)) // stride_h + 1
    output_w = (width + 2 * pad_w - (dilation_w * (kernel_w - 1) + 1)) // stride_w + 1
    output_shape = [cfg['batch_size'], cfg['output_channels'], output_h, output_w]
    return output_shape

def calculate_gmacs(item):
    def reduce_list_prod(x):
        return reduce(lambda a, b : a * b, x)
    cfg = {
        'image_shape' : [int(item['H']), int(item['W'])],
        'kernel_shape' : [int(item['kH']), int(item['kW'])],
        'pads' : [int(item['pad_h']), int(item['pad_w'])],
        'dilations' : [int(item['dilation_h']), int(item['dilation_w'])],
        'strides' : [int(item['stride_h']), int(item['stride_w'])],
        'batch_size' : int(item['N']),
        'output_channels' : int(item['kN']),
    }
    output_shape = calculate_output_shape(cfg)
    gmacs = reduce_list_prod(output_shape) * reduce_list_prod(cfg['kernel_shape']) * int(item['C'])
    return gmacs * 1e-9

##### Calculate

In [105]:
import numpy as np

energies = [float(item['energy']) / int(item['repeat']) for item in data_raw]
latencies = [float(item['latency']) / int(item['repeat']) for item in data_raw]
gmacs = [calculate_gmacs(item) for item in data_raw]

### Plot figures

##### data distribution

In [106]:
def show_distribution(data, name : str):
    print('distribution info for:', name)
    print('min:', min(data))
    print('max:', max(data))
    print('median:', np.median(data))
    multiple_of_median = 10
    upper_limit = np.median(data) * multiple_of_median
    used_points = [x for x in data if x <= upper_limit]
    print('using multiple {} of median as upper limit:'.format(multiple_of_median), upper_limit)
    print('number of points within range:', len(used_points))
    print('ratio of points within range:', len(used_points) / len(data))
    fig = px.histogram({name : used_points}, x = name, nbins = 100, title = 'data distribution of ' + name)
    fig.show()

show_distribution(energies, 'energy')
show_distribution(latencies, 'latency')
show_distribution(gmacs, 'gmacs')

distribution info for: energy
min: 0.0
max: 0.31974952300000004
median: 0.0198936235
using multiple 10 of median as upper limit: 0.198936235
number of points within range: 1134
ratio of points within range: 0.9912587412587412


distribution info for: latency
min: 7.227e-05
max: 0.006605013
median: 0.000507265
using multiple 10 of median as upper limit: 0.00507265
number of points within range: 1139
ratio of points within range: 0.9956293706293706


distribution info for: gmacs
min: 6.4e-08
max: 8.851046400000001
median: 0.021751296
using multiple 10 of median as upper limit: 0.21751296
number of points within range: 941
ratio of points within range: 0.8225524475524476


##### Energy vs Latency

In [107]:
data_energy_latency = {
    'energy' : energies,
    'latency' : [x * 1e3 for x in latencies],
    'gmacs' : gmacs,
}
labels_energy_latency = {
    'energy' : 'Energy (Joule)', 
    'latency' : 'Latency (ms)', 
    'gmacs' : 'GMACS', 
}
fig = px.scatter(data_energy_latency, x = 'latency', y = 'energy', color = 'gmacs', size = 'gmacs', 
                 title = 'Energy vs Latency (batch size = 1)', 
                 log_x = True, log_y = True, 
                 labels = labels_energy_latency)
fig.show()

##### Energy/Latency vs GMACS

In [108]:
keys = ['energy', 'latency']
fig = make_subplots(1, 2, subplot_titles = ['Energy vs GMACS', 'Latency vs GMACS'])
for i, k in enumerate(keys):
    fig.add_trace(
        go.Scatter(x = data_energy_latency['gmacs'], y = data_energy_latency[k], mode = 'markers'),
        row = 1, col = i + 1
    )
    fig.update_xaxes(title_text = labels_energy_latency['gmacs'], row = 1, col = i + 1, type = 'log')
    fig.update_yaxes(title_text = labels_energy_latency[k], row = 1, col = i + 1, type = 'log')
fig.update_layout(coloraxis = dict(colorscale = 'Bluered_r'), showlegend = False)
fig.show()

### Build predictor  

##### Util functions

In [116]:
import numpy as np
from sklearn.metrics import mean_squared_error

def get_accuracy(y_pred, y_true, threshold = 0.01):
    a = (y_true - y_pred) / y_true
    b = np.where(abs(a) <= threshold)
    return len(b[0]) / len(y_true)

def get_metrics(y_pred, y_true):
    rmspe = (np.sqrt(np.mean(np.square((y_true - y_pred) / y_true)))) * 100
    rmse = np.sqrt(mean_squared_error(y_pred, y_true))
    error = rmse / np.mean(y_true)
    acc5 = get_accuracy(y_pred, y_true, threshold=0.05)
    acc10 = get_accuracy(y_pred, y_true, threshold=0.10)
    acc15 = get_accuracy(y_pred, y_true, threshold=0.15)
    print(f"rmse: {rmse:.4f}; rmspe: {rmspe:.4f}; error: {error:.4f}; 5% accuracy: {acc5:.4f}; 10% accuracy: {acc10:.4f}; 15% accuracy: {acc15:.4f}.")

def get_positive_indices(y_pred, y_true, threshold = 0.01):
    a = (y_true - y_pred) / y_true
    b = np.where(abs(a) <= threshold)
    return b[0]

##### Predictor

In [117]:
from sklearn.model_selection import train_test_split

feature_vectors = []
for item in data_raw:
    vec = [
            # item['N'], 
            # item['C'], 
            item['H'], 
            item['W'], 
            item['kN'], 
            item['kC'], 
            item['kH'], 
            item['kW'], 
            item['pad_h'], 
            item['pad_w'], 
            # item['dilation_h'], 
            # item['dilation_w'], 
            item['stride_h'], 
            item['stride_w'], 
            # item['group'], 
            item['has_bias'],
            calculate_gmacs(item),
        ]
    vec = [int(x) for x in vec]
    feature_vectors.append(vec)
feature_train, feature_test, energy_train, energy_test, latency_train, latency_test = train_test_split(feature_vectors, energies, latencies)
train_data = {
    'feature' : feature_train, 
    'energy' : energy_train, 
    'latency' : latency_train, 
}
val_data = {
    'feature' : feature_test, 
    'energy' : energy_test, 
    'latency' : latency_test, 
}
total_data = {
    'feature' : feature_vectors, 
    'energy' : energies, 
    'latency' : latencies, 
}

# RandomForestRegressor
from sklearn.ensemble import RandomForestRegressor
n_features = len(train_data['feature'][0])

positive_indices = {}
for label in ['energy', 'latency']:
    model = RandomForestRegressor(
        max_depth=50,
        n_estimators=370,
        min_samples_leaf=1,
        min_samples_split=2,
        max_features=n_features,
        oob_score=True,
        random_state=10
    )
    model.fit(train_data['feature'], train_data[label])
    predicts = model.predict(val_data['feature'])
    print(f'{label} Results:')
    get_metrics(predicts, val_data[label])
    positive_indices[label] = get_positive_indices(model.predict(total_data['feature']), total_data[label])
    print('')





energy Results:
rmse: 0.0085; rmspe: inf; error: 0.3200; 5% accuracy: 0.1818; 10% accuracy: 0.3916; 15% accuracy: 0.5350.




divide by zero encountered in true_divide


divide by zero encountered in true_divide


divide by zero encountered in true_divide



latency Results:
rmse: 0.0002; rmspe: 20.9895; error: 0.2640; 5% accuracy: 0.2238; 10% accuracy: 0.4580; 15% accuracy: 0.6399.

