In [5]:
# Configs

label_name = "math"
embedding_type = "perf" # time or perf

In [6]:
import pandas as pd
import numpy as np
import tensorflow as tf

from tensorflow.keras import activations
from tensorflow.keras import layers
from tensorflow.keras.layers.experimental import preprocessing

from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split

np.set_printoptions(precision=3, suppress=True)

In [7]:
dataset = pd.read_csv(f"../dataset/{embedding_type}/{label_name}_dataset.csv")
dataset = pd.get_dummies(dataset)

train, test = train_test_split(dataset, test_size=0.33, random_state=42, shuffle=True)

train_dataset_features = train.copy().drop('label', axis=1)
train_dataset_labels = train.copy().pop('label')

test_dataset_features = test.copy().drop('label', axis=1)
test_dataset_labels = test.copy().pop('label')

dataset.head()

Unnamed: 0,branch-misses_FEATURE_CONFIG,branch-misses_INTERCEPT,branch-misses_R-VAL,branches_FEATURE_CONFIG,branches_INTERCEPT,branches_R-VAL,context-switches_FEATURE_CONFIG,context-switches_INTERCEPT,context-switches_R-VAL,cpu-migrations_FEATURE_CONFIG,...,stalled-cycles-frontend_FEATURE_TYPE_LOGLOG_POLYNOMIAL,stalled-cycles-frontend_FEATURE_TYPE_LOG_POLYNOMIAL,stalled-cycles-frontend_FEATURE_TYPE_POLYNOMIAL,stalled-cycles-frontend_FEATURE_TYPE_POWER,task-clock_FEATURE_TYPE_FACTORIAL,task-clock_FEATURE_TYPE_FRACTIONAL_POWER,task-clock_FEATURE_TYPE_LOGLOG_POLYNOMIAL,task-clock_FEATURE_TYPE_LOG_POLYNOMIAL,task-clock_FEATURE_TYPE_POLYNOMIAL,task-clock_FEATURE_TYPE_POWER
0,0.0,12176.27864,5.274472,1.0,354489.841881,31.023435,0,0.0,0.0,0,...,0,1,0,0,0,0,0,0,1,0
1,0.8,12300.866448,140.866882,0.1,360567.282593,278.416553,0,0.0,0.0,0,...,0,0,0,0,0,0,0,1,0,0
2,0.0,12396.648839,38.928139,1.0,358483.822954,81.019933,0,0.0,0.0,0,...,0,0,1,0,0,0,0,0,1,0
3,4.0,12358.582719,-1.1e-05,1.0,355754.502857,256.760672,0,0.0,0.0,0,...,0,0,0,1,0,1,0,0,0,0
4,1.0,12313.598463,3.715744,1.0,354852.459189,327.2899,0,0.0,0.0,0,...,0,1,0,0,0,1,0,0,0,0


In [8]:
train_dataset_features.sort_index()

Unnamed: 0,branch-misses_FEATURE_CONFIG,branch-misses_INTERCEPT,branch-misses_R-VAL,branches_FEATURE_CONFIG,branches_INTERCEPT,branches_R-VAL,context-switches_FEATURE_CONFIG,context-switches_INTERCEPT,context-switches_R-VAL,cpu-migrations_FEATURE_CONFIG,...,stalled-cycles-frontend_FEATURE_TYPE_LOGLOG_POLYNOMIAL,stalled-cycles-frontend_FEATURE_TYPE_LOG_POLYNOMIAL,stalled-cycles-frontend_FEATURE_TYPE_POLYNOMIAL,stalled-cycles-frontend_FEATURE_TYPE_POWER,task-clock_FEATURE_TYPE_FACTORIAL,task-clock_FEATURE_TYPE_FRACTIONAL_POWER,task-clock_FEATURE_TYPE_LOGLOG_POLYNOMIAL,task-clock_FEATURE_TYPE_LOG_POLYNOMIAL,task-clock_FEATURE_TYPE_POLYNOMIAL,task-clock_FEATURE_TYPE_POWER
0,0.0,12176.278640,5.274472e+00,1.0,354489.841881,31.023435,0,0.0,0.0,0,...,0,1,0,0,0,0,0,0,1,0
1,0.8,12300.866448,1.408669e+02,0.1,360567.282593,278.416553,0,0.0,0.0,0,...,0,0,0,0,0,0,0,1,0,0
2,0.0,12396.648839,3.892814e+01,1.0,358483.822954,81.019933,0,0.0,0.0,0,...,0,0,1,0,0,0,0,0,1,0
3,4.0,12358.582719,-1.096654e-05,1.0,355754.502857,256.760672,0,0.0,0.0,0,...,0,0,0,1,0,1,0,0,0,0
4,1.0,12313.598463,3.715744e+00,1.0,354852.459189,327.289900,0,0.0,0.0,0,...,0,1,0,0,0,1,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5938,0.0,12467.420365,2.007505e+01,1.0,358499.796265,102.392718,0,0.0,0.0,0,...,0,1,0,0,0,0,0,1,0,0
5940,2.0,12278.039263,2.621763e-14,1.0,355209.559184,256.793758,0,0.0,0.0,0,...,0,0,0,0,0,0,0,0,1,0
5942,1.0,12311.611866,3.702682e+00,1.0,354513.610929,884.972592,0,0.0,0.0,0,...,0,0,0,0,0,0,0,0,1,0
5947,1.0,12310.832756,4.590031e+00,1.0,354976.541826,664.581813,0,0.0,0.0,0,...,0,1,0,0,0,0,0,0,1,0


In [9]:
test_dataset_features.sort_index()

Unnamed: 0,branch-misses_FEATURE_CONFIG,branch-misses_INTERCEPT,branch-misses_R-VAL,branches_FEATURE_CONFIG,branches_INTERCEPT,branches_R-VAL,context-switches_FEATURE_CONFIG,context-switches_INTERCEPT,context-switches_R-VAL,cpu-migrations_FEATURE_CONFIG,...,stalled-cycles-frontend_FEATURE_TYPE_LOGLOG_POLYNOMIAL,stalled-cycles-frontend_FEATURE_TYPE_LOG_POLYNOMIAL,stalled-cycles-frontend_FEATURE_TYPE_POLYNOMIAL,stalled-cycles-frontend_FEATURE_TYPE_POWER,task-clock_FEATURE_TYPE_FACTORIAL,task-clock_FEATURE_TYPE_FRACTIONAL_POWER,task-clock_FEATURE_TYPE_LOGLOG_POLYNOMIAL,task-clock_FEATURE_TYPE_LOG_POLYNOMIAL,task-clock_FEATURE_TYPE_POLYNOMIAL,task-clock_FEATURE_TYPE_POWER
8,2.0,12378.326531,-209.163265,0.0,356091.016439,1.403287e+01,0,0.0,0.0,0,...,0,1,0,0,0,0,0,1,0,0
12,1.0,12213.758169,3.625786,1.0,353659.222153,3.252067e+02,0,0.0,0.0,0,...,0,0,0,0,0,1,0,0,0,0
14,0.0,12116.890361,60.779827,1.0,356370.899267,7.348262e+01,0,0.0,0.0,0,...,0,0,0,0,0,1,0,0,0,0
15,1.0,12371.937143,2.987563,1.0,356527.551020,6.530074e+01,0,0.0,0.0,0,...,0,1,0,0,0,0,0,0,0,1
17,0.0,12247.457746,52.703847,1.0,356387.498915,1.423683e+02,0,0.0,0.0,0,...,0,0,0,0,0,1,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5941,1.0,12314.822883,1.704246,1.0,354975.643034,6.615757e+02,0,0.0,0.0,0,...,0,1,0,0,0,0,0,0,1,0
5943,2.0,12300.387768,-0.000003,4.0,359984.733725,-2.879984e-13,0,0.0,0.0,0,...,0,0,1,0,0,0,0,1,0,0
5944,0.0,12233.320504,75.226898,1.0,359885.417516,2.033561e+03,0,0.0,0.0,0,...,0,0,0,0,0,0,0,1,0,0
5945,0.9,12262.235739,89.969619,2.0,355167.687085,1.331121e-03,0,0.0,0.0,0,...,0,0,0,0,0,0,0,0,1,0


In [28]:
normalize = preprocessing.Normalization()
normalize.adapt(train_dataset_features)

model = tf.keras.Sequential([
  normalize,
  layers.Dense(512, activation=activations.relu),
  layers.Dense(512, activation=activations.relu),
  layers.Dense(1024, activation=activations.relu),
  layers.Dense(1024, activation=activations.relu),
  layers.Dense(512, activation=activations.relu),
  layers.Dense(512, activation=activations.relu),
  layers.Dense(256, activation=activations.relu),
  layers.Dense(256, activation=activations.relu),
  layers.Dense(128, activation=activations.relu),
  layers.Dense(128, activation=activations.relu),
  layers.Dense(32, activation=activations.relu),
  layers.Dense(32, activation=activations.relu),
  layers.Dense(8, activation=activations.relu),
  layers.Dense(1, activation='sigmoid')
])

model.compile(loss=tf.keras.losses.BinaryCrossentropy(),
              optimizer = tf.optimizers.Adam(),
              metrics=['accuracy'])

In [29]:
model.fit(train_dataset_features, train_dataset_labels, epochs=100)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


<keras.callbacks.History at 0x13d488370>

In [30]:
predicted_train_dataset_features=(model.predict(train_dataset_features) > 0.5).astype("int32")

tf.math.confusion_matrix(train_dataset_labels, predicted_train_dataset_features)

<tf.Tensor: shape=(2, 2), dtype=int32, numpy=
array([[3226,   48],
       [  81,  630]], dtype=int32)>

In [31]:
accuracy = accuracy_score(train_dataset_labels.to_numpy(), predicted_train_dataset_features)
print('Accuracy: %f' % accuracy)
precision = precision_score(train_dataset_labels.to_numpy(), predicted_train_dataset_features)
print('Precision: %f' % precision)
recall = recall_score(train_dataset_labels.to_numpy(), predicted_train_dataset_features)
print('Recall: %f' % recall)
f1 = f1_score(train_dataset_labels.to_numpy(), predicted_train_dataset_features)
print('F1 score: %f' % f1)

Accuracy: 0.967629
Precision: 0.929204
Recall: 0.886076
F1 score: 0.907127


In [32]:
predicted_test_dataset_features=(model.predict(test_dataset_features) > 0.5).astype("int32")

tf.math.confusion_matrix(test_dataset_labels, predicted_test_dataset_features)

<tf.Tensor: shape=(2, 2), dtype=int32, numpy=
array([[1530,  133],
       [ 148,  153]], dtype=int32)>

In [33]:
accuracy = accuracy_score(test_dataset_labels.to_numpy(), predicted_test_dataset_features)
print('Accuracy: %f' % accuracy)
precision = precision_score(test_dataset_labels.to_numpy(), predicted_test_dataset_features)
print('Precision: %f' % precision)
recall = recall_score(test_dataset_labels.to_numpy(), predicted_test_dataset_features)
print('Recall: %f' % recall)
f1 = f1_score(test_dataset_labels.to_numpy(), predicted_test_dataset_features)
print('F1 score: %f' % f1)

Accuracy: 0.856925
Precision: 0.534965
Recall: 0.508306
F1 score: 0.521295
