In [122]:
from fynesse import access, assess, address

from functools import partial

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd


In [123]:
from config import MODELS, EVENTS, SEEDS

## Feature Construction

In [124]:
seed = 0
master_dict = assess.eat_pickle(f'./data/pickle/s{seed}.pickle')

In [125]:
# choose your desired event combo

featured_events = ['cache-misses', 'fp_arith_inst_retired.128b_packed_single']
X = address.make_features(master_dict, featured_events, n_bins=32, models=MODELS)

In [126]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler().fit(X)
X = scaler.transform(X)

In [127]:
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder().fit(MODELS)

y = list()

for model in MODELS:
    y += [model] * 100

y = le.transform(y)

In [128]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

## k-NN

In [129]:
from sklearn.neighbors import KNeighborsClassifier

knn = KNeighborsClassifier()
knn.get_params()

{'algorithm': 'auto',
 'leaf_size': 30,
 'metric': 'minkowski',
 'metric_params': None,
 'n_jobs': None,
 'n_neighbors': 5,
 'p': 2,
 'weights': 'uniform'}

In [130]:
from sklearn.model_selection import GridSearchCV

params = {'n_neighbors': [1, 3, 5, 10]}

clf = GridSearchCV(knn, params, cv=5)
clf.fit(X_train, y_train)

clf.cv_results_['mean_test_score']

array([0.99375 , 0.99375 , 0.996875, 0.99375 ])

In [131]:
clf.cv_results_['rank_test_score']

array([2, 2, 1, 2], dtype=int32)

In [132]:
knn = KNeighborsClassifier(n_neighbors=1)

knn.fit(X_train, y_train)

from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score

accuracy = accuracy_score(y_test, knn.predict(X_test))
f1 = f1_score(y_test, knn.predict(X_test), average='macro')
precision = precision_score(y_test, knn.predict(X_test), average='macro')
recall = recall_score(y_test, knn.predict(X_test), average='macro')

print(f'Accuracy: {accuracy}')
print(f'F1: {f1}')
print(f'Precision: {precision}')
print(f'Recall: {recall}')

Accuracy: 0.9875
F1: 0.9874921826141339
Precision: 0.9880952380952381
Recall: 0.9875


## DT

In [133]:
from sklearn.tree import DecisionTreeClassifier

dt = DecisionTreeClassifier(random_state=42)
dt.get_params()

{'ccp_alpha': 0.0,
 'class_weight': None,
 'criterion': 'gini',
 'max_depth': None,
 'max_features': None,
 'max_leaf_nodes': None,
 'min_impurity_decrease': 0.0,
 'min_samples_leaf': 1,
 'min_samples_split': 2,
 'min_weight_fraction_leaf': 0.0,
 'monotonic_cst': None,
 'random_state': 42,
 'splitter': 'best'}

In [134]:
params = {'criterion': ['gini', 'entropy', 'log_loss'],
          'splitter': ['best', 'random']}

clf = GridSearchCV(dt, params, cv=5)
clf.fit(X_train, y_train)

clf.cv_results_['mean_test_score']

array([0.99375 , 0.984375, 0.990625, 0.996875, 0.990625, 0.996875])

In [135]:
clf.cv_results_['rank_test_score']

array([3, 6, 4, 1, 4, 1], dtype=int32)

In [136]:
clf.cv_results_['params']

[{'criterion': 'gini', 'splitter': 'best'},
 {'criterion': 'gini', 'splitter': 'random'},
 {'criterion': 'entropy', 'splitter': 'best'},
 {'criterion': 'entropy', 'splitter': 'random'},
 {'criterion': 'log_loss', 'splitter': 'best'},
 {'criterion': 'log_loss', 'splitter': 'random'}]

In [137]:
dt = DecisionTreeClassifier(random_state=42)
dt.fit(X_train, y_train)

accuracy = accuracy_score(y_test, dt.predict(X_test))
f1 = f1_score(y_test, dt.predict(X_test), average='macro')
precision = precision_score(y_test, dt.predict(X_test), average='macro')
recall = recall_score(y_test, dt.predict(X_test), average='macro')

print(f'Accuracy: {accuracy}')
print(f'F1: {f1}')
print(f'Precision: {precision}')
print(f'Recall: {recall}')

Accuracy: 0.95
F1: 0.95
Precision: 0.95
Recall: 0.95


## RF

In [138]:
from sklearn.ensemble import RandomForestClassifier

rf = RandomForestClassifier(random_state=42)
rf.get_params()

{'bootstrap': True,
 'ccp_alpha': 0.0,
 'class_weight': None,
 'criterion': 'gini',
 'max_depth': None,
 'max_features': 'sqrt',
 'max_leaf_nodes': None,
 'max_samples': None,
 'min_impurity_decrease': 0.0,
 'min_samples_leaf': 1,
 'min_samples_split': 2,
 'min_weight_fraction_leaf': 0.0,
 'monotonic_cst': None,
 'n_estimators': 100,
 'n_jobs': None,
 'oob_score': False,
 'random_state': 42,
 'verbose': 0,
 'warm_start': False}

In [139]:
params = {'n_estimators': [1, 5, 10, 20, 50, 100],
          'random_state': [42]}

clf = GridSearchCV(rf, params, cv=5)
clf.fit(X_train, y_train)

clf.cv_results_['mean_test_score']

array([0.959375, 0.990625, 0.996875, 0.996875, 0.996875, 0.996875])

In [140]:
clf.cv_results_['rank_test_score']

array([6, 5, 1, 1, 1, 1], dtype=int32)

In [141]:
rf = RandomForestClassifier(n_estimators=10, random_state=42)

rf.fit(X_train, y_train)

accuracy = accuracy_score(y_test, rf.predict(X_test))
f1 = f1_score(y_test, rf.predict(X_test), average='macro')
precision = precision_score(y_test, rf.predict(X_test), average='macro')
recall = recall_score(y_test, rf.predict(X_test), average='macro')

print(f'Accuracy: {accuracy}')
print(f'F1: {f1}')
print(f'Precision: {precision}')
print(f'Recall: {recall}')

Accuracy: 0.975
F1: 0.975
Precision: 0.975
Recall: 0.975


## MLP

In [142]:
from sklearn.neural_network import MLPClassifier

mlp = MLPClassifier(random_state=42)

In [143]:
params = {'hidden_layer_sizes': [[i+1] for i in range(10)],
          'activation': ['identity', 'logistic', 'tanh', 'relu'],
          'solver': ['sgd', 'adam'],
          'learning_rate_init': [0.001, 0.005, 0.01],
          'max_iter': [500, 1000, 2500, 5000],
          'momentum': [0.9, 0.99],
          'random_state': [42]}

clf = GridSearchCV(mlp, params, cv=5)
clf.fit(X_train, y_train)

clf.cv_results_['mean_test_score']



array([0.5     , 0.28125 , 0.978125, ..., 0.984375, 0.996875, 0.984375])

In [147]:
clf.cv_results_

{'mean_fit_time': array([0.21633472, 0.19759583, 0.24070191, ..., 0.10473676, 0.06382189,
        0.10120106]),
 'std_fit_time': array([0.08524167, 0.01389521, 0.07633053, ..., 0.01287386, 0.0013464 ,
        0.01056575]),
 'mean_score_time': array([0.00087509, 0.00031257, 0.0008389 , ..., 0.0002614 , 0.00029159,
        0.00028949]),
 'std_score_time': array([5.59820381e-04, 2.26971482e-05, 5.61982230e-04, ...,
        4.72631023e-05, 4.92452984e-05, 3.94420720e-05]),
 'param_activation': masked_array(data=['identity', 'identity', 'identity', ..., 'relu',
                    'relu', 'relu'],
              mask=[False, False, False, ..., False, False, False],
        fill_value='?',
             dtype=object),
 'param_hidden_layer_sizes': masked_array(data=[list([1]), list([1]), list([1]), ..., list([10]),
                    list([10]), list([10])],
              mask=[False, False, False, ..., False, False, False],
        fill_value='?',
             dtype=object),
 'param_learning_

In [155]:
np.where(clf.cv_results_['rank_test_score'] == 1, clf.cv_results_['params'], 0)

array([0, 0, 0, ..., 0,
       {'activation': 'relu', 'hidden_layer_sizes': [10], 'learning_rate_init': 0.01, 'max_iter': 5000, 'momentum': 0.99, 'random_state': 42, 'solver': 'sgd'},
       0], dtype=object)

In [146]:
mlp = MLPClassifier(hidden_layer_sizes=[5], max_iter=5000, random_state=42)

mlp.fit(X_train, y_train)

accuracy = accuracy_score(y_test, mlp.predict(X_test))
f1 = f1_score(y_test, mlp.predict(X_test), average='macro')
precision = precision_score(y_test, mlp.predict(X_test), average='macro')
recall = recall_score(y_test, mlp.predict(X_test), average='macro')

print(f'Accuracy: {accuracy}')
print(f'F1: {f1}')
print(f'Precision: {precision}')
print(f'Recall: {recall}')

Accuracy: 0.975
F1: 0.9749373433583959
Precision: 0.9772727272727273
Recall: 0.975
