In [1]:
from fynesse import access, assess, address

from functools import partial

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd


In [2]:
models = ['resnet50', 'retinanet', 'unet', 'vitb16']

# this is a total of 15 events
events = ['instructions',
'cache-misses',
'cache-references',
'L1-dcache-load-misses',
'L1-dcache-loads',
'L1-dcache-stores',
'LLC-load-misses',
'LLC-loads',
'LLC-store-misses',
'LLC-stores',
'fp_arith_inst_retired.128b_packed_single',
'fp_arith_inst_retired.256b_packed_single',
'fp_arith_inst_retired.512b_packed_single',
'fp_arith_inst_retired.scalar_double',
'fp_arith_inst_retired.scalar_single']

seeds = [0, 42, 137]

## Feature Construction

In [60]:
seed = 0
master_dict = assess.eat_pickle(f'./data/pickle/s{seed}.pickle')

In [61]:
# choose your desired event combo

featured_events = ['cache-misses', 'fp_arith_inst_retired.256b_packed_single']
X = address.make_features(master_dict, featured_events, n_bins=5, models=models)

In [62]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler().fit(X)

X = scaler.transform(X)

In [63]:
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder().fit(models)

y = list()

for model in models:
    y += [model] * 100

y = le.transform(y)

In [64]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

## k-NN

In [65]:
from sklearn.neighbors import KNeighborsClassifier

knn = KNeighborsClassifier()
knn.get_params()

{'algorithm': 'auto',
 'leaf_size': 30,
 'metric': 'minkowski',
 'metric_params': None,
 'n_jobs': None,
 'n_neighbors': 5,
 'p': 2,
 'weights': 'uniform'}

In [66]:
from sklearn.model_selection import GridSearchCV

params = {'n_neighbors': [1, 3, 5, 10],
          'weights': ['uniform', 'distance'],
          'algorithm': ['auto', 'ball_tree', 'kd_tree', 'brute'],
          'p': [1, 2 ,3]}

clf = GridSearchCV(knn, params, cv=5)
clf.fit(X_train, y_train)

clf.cv_results_

{'mean_fit_time': array([0.00042682, 0.00029535, 0.00028496, 0.00025706, 0.00027728,
        0.00023637, 0.00021586, 0.0001996 , 0.00021124, 0.00019946,
        0.00021477, 0.00020404, 0.00021377, 0.00020885, 0.00020833,
        0.00020204, 0.00020576, 0.00021052, 0.00020404, 0.00021143,
        0.00020442, 0.00019827, 0.00022502, 0.00020895, 0.00021267,
        0.00020065, 0.00020881, 0.00019884, 0.00027776, 0.00029101,
        0.00023828, 0.00020938, 0.00020804, 0.00022216, 0.00029669,
        0.00029268, 0.00020518, 0.00020199, 0.00020938, 0.00019875,
        0.00029526, 0.00027528, 0.00021558, 0.00020099, 0.00021181,
        0.0002048 , 0.00028596, 0.00027776, 0.000209  , 0.00019884,
        0.0001986 , 0.00020289, 0.00020361, 0.0002028 , 0.00021558,
        0.00019941, 0.00020614, 0.00019655, 0.00021186, 0.00021009,
        0.00020561, 0.00020003, 0.0002028 , 0.00020046, 0.00021257,
        0.00021238, 0.00020785, 0.00020204, 0.00020843, 0.00019751,
        0.0002212 , 0.00020213,

In [67]:
knn = KNeighborsClassifier(n_neighbors=3)

knn.fit(X_train, y_train)

In [68]:
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score

accuracy = accuracy_score(y_test, knn.predict(X_test))
f1 = f1_score(y_test, knn.predict(X_test), average='macro')
precision = precision_score(y_test, knn.predict(X_test), average='macro')
recall = recall_score(y_test, knn.predict(X_test), average='macro')

print(f'Accuracy: {accuracy}')
print(f'F1: {f1}')
print(f'Precision: {precision}')
print(f'Recall: {recall}')

Accuracy: 1.0
F1: 1.0
Precision: 1.0
Recall: 1.0


## DT

In [69]:
from sklearn.tree import DecisionTreeClassifier

dt = DecisionTreeClassifier(random_state=42)
dt.get_params()

{'ccp_alpha': 0.0,
 'class_weight': None,
 'criterion': 'gini',
 'max_depth': None,
 'max_features': None,
 'max_leaf_nodes': None,
 'min_impurity_decrease': 0.0,
 'min_samples_leaf': 1,
 'min_samples_split': 2,
 'min_weight_fraction_leaf': 0.0,
 'monotonic_cst': None,
 'random_state': 42,
 'splitter': 'best'}

In [70]:
dt.fit(X_train, y_train)

accuracy = accuracy_score(y_test, dt.predict(X_test))
f1 = f1_score(y_test, dt.predict(X_test), average='macro')
precision = precision_score(y_test, dt.predict(X_test), average='macro')
recall = recall_score(y_test, dt.predict(X_test), average='macro')

print(f'Accuracy: {accuracy}')
print(f'F1: {f1}')
print(f'Precision: {precision}')
print(f'Recall: {recall}')

Accuracy: 1.0
F1: 1.0
Precision: 1.0
Recall: 1.0


## RF

In [71]:
from sklearn.ensemble import RandomForestClassifier

rf = RandomForestClassifier(random_state=42)
rf.get_params()

{'bootstrap': True,
 'ccp_alpha': 0.0,
 'class_weight': None,
 'criterion': 'gini',
 'max_depth': None,
 'max_features': 'sqrt',
 'max_leaf_nodes': None,
 'max_samples': None,
 'min_impurity_decrease': 0.0,
 'min_samples_leaf': 1,
 'min_samples_split': 2,
 'min_weight_fraction_leaf': 0.0,
 'monotonic_cst': None,
 'n_estimators': 100,
 'n_jobs': None,
 'oob_score': False,
 'random_state': 42,
 'verbose': 0,
 'warm_start': False}

In [72]:
params = {'n_estimators': [i+1 for i in range(10)],
          'random_state': [42]}

clf = GridSearchCV(rf, params, cv=5)
clf.fit(X_train, y_train)

clf.cv_results_

{'mean_fit_time': array([0.00149946, 0.00173159, 0.00224323, 0.0024765 , 0.00272999,
        0.00292697, 0.00343876, 0.00377569, 0.00416913, 0.0045486 ]),
 'std_fit_time': array([4.15713499e-04, 2.00395625e-04, 1.77606857e-04, 1.07638360e-04,
        1.62851067e-04, 5.23472084e-05, 1.43972876e-04, 1.23098779e-04,
        7.57205696e-05, 1.34623080e-04]),
 'mean_score_time': array([0.00042024, 0.00034299, 0.0004128 , 0.00035634, 0.00035195,
        0.000317  , 0.00035882, 0.0003653 , 0.00036278, 0.0003819 ]),
 'std_score_time': array([9.46798092e-05, 2.44448698e-05, 3.66056305e-05, 3.64131912e-05,
        4.45270937e-05, 2.04332373e-05, 3.42381034e-05, 3.41301505e-05,
        2.06413768e-05, 3.19300786e-05]),
 'param_n_estimators': masked_array(data=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
              mask=[False, False, False, False, False, False, False, False,
                    False, False],
        fill_value='?',
             dtype=object),
 'param_random_state': masked_array(data=[42,

In [73]:
rf = RandomForestClassifier(n_estimators=3, random_state=42)

rf.fit(X_train, y_train)

accuracy = accuracy_score(y_test, rf.predict(X_test))
f1 = f1_score(y_test, rf.predict(X_test), average='macro')
precision = precision_score(y_test, rf.predict(X_test), average='macro')
recall = recall_score(y_test, rf.predict(X_test), average='macro')

print(f'Accuracy: {accuracy}')
print(f'F1: {f1}')
print(f'Precision: {precision}')
print(f'Recall: {recall}')

Accuracy: 0.9875
F1: 0.9874921826141339
Precision: 0.9880952380952381
Recall: 0.9875


## MLP

In [74]:
from sklearn.neural_network import MLPClassifier

mlp = MLPClassifier(random_state=42)

In [75]:
clf.cv_results_['rank_test_score']

array([ 1, 10,  7,  7,  7,  2,  2,  2,  2,  2], dtype=int32)

In [76]:
clf.cv_results_['params'][-9]

{'n_estimators': 2, 'random_state': 42}

In [77]:
params = {'hidden_layer_sizes': [[i+1] for i in range(10)],
          'max_iter': [5000],
          'random_state': [42]}

clf = GridSearchCV(mlp, params, cv=5)
clf.fit(X_train, y_train)

clf.cv_results_

{'mean_fit_time': array([0.10494051, 0.08627176, 0.20800519, 0.4381597 , 0.39138956,
        0.39126759, 0.41783276, 0.45053535, 0.50908656, 0.34692359]),
 'std_fit_time': array([0.0063505 , 0.00297162, 0.00148104, 0.00983985, 0.01207056,
        0.0099521 , 0.00361535, 0.00478046, 0.00560979, 0.01233056]),
 'mean_score_time': array([0.00029607, 0.0002902 , 0.00029578, 0.00030174, 0.00030537,
        0.00031161, 0.00030169, 0.00029187, 0.00029845, 0.00029263]),
 'std_score_time': array([1.99414873e-05, 2.14101477e-05, 9.17908433e-06, 7.96417984e-06,
        1.20068316e-05, 2.17188726e-05, 2.26986508e-06, 2.56075317e-06,
        1.03621882e-05, 5.86645374e-06]),
 'param_hidden_layer_sizes': masked_array(data=[list([1]), list([2]), list([3]), list([4]), list([5]),
                    list([6]), list([7]), list([8]), list([9]), list([10])],
              mask=[False, False, False, False, False, False, False, False,
                    False, False],
        fill_value='?',
             dt

In [78]:
mlp = MLPClassifier(hidden_layer_sizes=[5], max_iter=5000, random_state=42)

mlp.fit(X_train, y_train)

accuracy = accuracy_score(y_test, mlp.predict(X_test))
f1 = f1_score(y_test, mlp.predict(X_test), average='macro')
precision = precision_score(y_test, mlp.predict(X_test), average='macro')
recall = recall_score(y_test, mlp.predict(X_test), average='macro')

print(f'Accuracy: {accuracy}')
print(f'F1: {f1}')
print(f'Precision: {precision}')
print(f'Recall: {recall}')

Accuracy: 1.0
F1: 1.0
Precision: 1.0
Recall: 1.0
