In [18]:
import csv
from sklearn.model_selection import train_test_split
from functools import reduce


def calculate_output_shape(cfg):
    # const int output_h = (height + 2 * pad_h - (dilation_h * (kernel_h - 1) + 1)) / stride_h + 1;
    # const int output_w = (width + 2 * pad_w - (dilation_w * (kernel_w - 1) + 1)) / stride_w + 1;
    height, width = cfg['image_shape']
    kernel_h, kernel_w = cfg['kernel_shape']
    pad_h, pad_w = cfg['pads']
    dilation_h, dilation_w = cfg['dilations']
    stride_h, stride_w = cfg['strides']
    output_h = (height + 2 * pad_h - (dilation_h * (kernel_h - 1) + 1)) // stride_h + 1
    output_w = (width + 2 * pad_w - (dilation_w * (kernel_w - 1) + 1)) // stride_w + 1
    output_shape = [cfg['batch_size'], cfg['output_channels'], output_h, output_w]
    return output_shape


def calculate_gmacs(item):
    def reduce_list_prod(x):
        return reduce(lambda a, b : a * b, x)
    cfg = {
        'image_shape' : [int(item['H']), int(item['W'])],
        'kernel_shape' : [int(item['kH']), int(item['kW'])],
        'pads' : [int(item['pad_h']), int(item['pad_w'])],
        'dilations' : [int(item['dilation_h']), int(item['dilation_w'])],
        'strides' : [int(item['stride_h']), int(item['stride_w'])],
        'batch_size' : int(item['N']),
        'output_channels' : int(item['kN']),
    }
    output_shape = calculate_output_shape(cfg)
    gmacs = reduce_list_prod(output_shape) * reduce_list_prod(cfg['kernel_shape']) * int(item['C'])
    return gmacs * 1e-9


def data_processer(file_name : str, split : bool = False):
    feature, energy, latency = [], [], []
    with open(file_name, 'r') as fp:
        f_csv = csv.DictReader(fp)
        data = list(f_csv)
    for item in data:
        repeat = int(item['repeat'])
        feature_vec = [
                        # item['N'], 
                        # item['C'], 
                        item['H'], 
                        item['W'], 
                        item['kN'], 
                        item['kC'], 
                        item['kH'], 
                        item['kW'], 
                        item['pad_h'], 
                        item['pad_w'], 
                        # item['dilation_h'], 
                        # item['dilation_w'], 
                        item['stride_h'], 
                        item['stride_w'], 
                        # item['group'], 
                        item['has_bias'],
                        calculate_gmacs(item),
                    ]
        feature_vec = [int(x) for x in feature_vec]
        feature.append(feature_vec)
        energy.append(float(item['energy']) / repeat)
        latency.append(float(item['latency']) / repeat)
    if split:
        feature_train, feature_test, energy_train, energy_test, latency_train, latency_test = train_test_split(feature, energy, latency)
        train_split = {
            'feature' : feature_train,
            'energy' : energy_train,
            'latency' : latency_train
        }
        test_split = {
            'feature' : feature_test,
            'energy' : energy_test,
            'latency' : latency_test
        }
        return train_split, test_split
    else:
        train_data = {
            'feature' : feature,
            'energy' : energy,
            'latency' : latency
        }
        return train_data
train_data, val_data = data_processer('onnx_conv_results_part.csv', True)
test_data = data_processer('onnx_conv_results_raw_new.csv', False)

print(len(test_data['feature']))

1144


In [15]:
import numpy as np
from sklearn.metrics import mean_squared_error


def get_accuracy(y_pred, y_true, threshold = 0.01):
    a = (y_true - y_pred) / y_true
    b = np.where(abs(a) <= threshold)
    return len(b[0]) / len(y_true)


def get_metrics(y_pred, y_true):
    rmspe = (np.sqrt(np.mean(np.square((y_true - y_pred) / y_true)))) * 100
    rmse = np.sqrt(mean_squared_error(y_pred, y_true))
    error = rmse / np.mean(y_true)
    acc5 = get_accuracy(y_pred, y_true, threshold=0.05)
    acc10 = get_accuracy(y_pred, y_true, threshold=0.10)
    acc15 = get_accuracy(y_pred, y_true, threshold=0.15)
    print(f"rmse: {rmse:.4f}; rmspe: {rmspe:.4f}; error: {error:.4f}; 5% accuracy: {acc5:.4f}; 10% accuracy: {acc10:.4f}; 15% accuracy: {acc15:.4f}.")


In [16]:
# RandomForestRegressor
from sklearn.ensemble import RandomForestRegressor
n_features = len(train_data['feature'][0])

for label in ["energy", "latency"]:
    model = RandomForestRegressor(
        max_depth=50,
        n_estimators=370,
        min_samples_leaf=1,
        min_samples_split=2,
        max_features=n_features,
        oob_score=True,
        random_state=10
    )
    model.fit(train_data["feature"], train_data[label])
    # val
    predicts = model.predict(val_data["feature"])
    print(f'{label} Results:')
    get_metrics(predicts, val_data[label])
    print("")
    # test
    predicts = model.predict(test_data["feature"])
    print('Testing set:')
    get_metrics(predicts, test_data[label])
    print("")
    


energy Results:
rmse: 0.9554; rmspe: 59.6581; error: 2.2549; 5% accuracy: 0.1721; 10% accuracy: 0.3289; 15% accuracy: 0.4685.

Testing set:
rmse: 0.2274; rmspe: inf; error: 7.3458; 5% accuracy: 0.1101; 10% accuracy: 0.2098; 15% accuracy: 0.3007.



  if sys.path[0] == '':
  


latency Results:
rmse: 0.0116; rmspe: 67.3600; error: 1.7016; 5% accuracy: 0.1931; 10% accuracy: 0.3528; 15% accuracy: 0.5029.

Testing set:
rmse: 0.0044; rmspe: 776.0877; error: 6.0964; 5% accuracy: 0.1486; 10% accuracy: 0.2990; 15% accuracy: 0.4318.

