In [1]:
# Configs

label_name = "math"
embedding_type = "perf" # time or perf

In [2]:
import pandas as pd
import numpy as np
import tensorflow as tf

from tensorflow.keras import layers
from tensorflow.keras.layers.experimental import preprocessing

from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split

np.set_printoptions(precision=3, suppress=True)

In [3]:
dataset = pd.read_csv(f"../dataset/{embedding_type}/{label_name}_dataset.csv")
dataset = pd.get_dummies(dataset)

train, test = train_test_split(dataset, test_size=0.33, random_state=42, shuffle=True)

train_dataset_features = train.copy()
train_dataset_labels = train.copy().pop('label')

test_dataset_features = test.copy()
test_dataset_labels = test.copy().pop('label')

dataset.head()

Unnamed: 0,branch-misses_FEATURE_CONFIG,branch-misses_INTERCEPT,branch-misses_R-VAL,branches_FEATURE_CONFIG,branches_INTERCEPT,branches_R-VAL,context-switches_FEATURE_CONFIG,context-switches_INTERCEPT,context-switches_R-VAL,cpu-migrations_FEATURE_CONFIG,...,stalled-cycles-frontend_FEATURE_TYPE_LOGLOG_POLYNOMIAL,stalled-cycles-frontend_FEATURE_TYPE_LOG_POLYNOMIAL,stalled-cycles-frontend_FEATURE_TYPE_POLYNOMIAL,stalled-cycles-frontend_FEATURE_TYPE_POWER,task-clock_FEATURE_TYPE_FACTORIAL,task-clock_FEATURE_TYPE_FRACTIONAL_POWER,task-clock_FEATURE_TYPE_LOGLOG_POLYNOMIAL,task-clock_FEATURE_TYPE_LOG_POLYNOMIAL,task-clock_FEATURE_TYPE_POLYNOMIAL,task-clock_FEATURE_TYPE_POWER
0,0.0,12176.27864,5.274472,1.0,354489.841881,31.023435,0,0.0,0.0,0,...,0,1,0,0,0,0,0,0,1,0
1,0.8,12300.866448,140.866882,0.1,360567.282593,278.416553,0,0.0,0.0,0,...,0,0,0,0,0,0,0,1,0,0
2,0.0,12396.648839,38.928139,1.0,358483.822954,81.019933,0,0.0,0.0,0,...,0,0,1,0,0,0,0,0,1,0
3,4.0,12358.582719,-1.1e-05,1.0,355754.502857,256.760672,0,0.0,0.0,0,...,0,0,0,1,0,1,0,0,0,0
4,1.0,12313.598463,3.715744,1.0,354852.459189,327.2899,0,0.0,0.0,0,...,0,1,0,0,0,1,0,0,0,0


In [4]:
train_dataset_features.sort_index()

Unnamed: 0,branch-misses_FEATURE_CONFIG,branch-misses_INTERCEPT,branch-misses_R-VAL,branches_FEATURE_CONFIG,branches_INTERCEPT,branches_R-VAL,context-switches_FEATURE_CONFIG,context-switches_INTERCEPT,context-switches_R-VAL,cpu-migrations_FEATURE_CONFIG,...,stalled-cycles-frontend_FEATURE_TYPE_LOGLOG_POLYNOMIAL,stalled-cycles-frontend_FEATURE_TYPE_LOG_POLYNOMIAL,stalled-cycles-frontend_FEATURE_TYPE_POLYNOMIAL,stalled-cycles-frontend_FEATURE_TYPE_POWER,task-clock_FEATURE_TYPE_FACTORIAL,task-clock_FEATURE_TYPE_FRACTIONAL_POWER,task-clock_FEATURE_TYPE_LOGLOG_POLYNOMIAL,task-clock_FEATURE_TYPE_LOG_POLYNOMIAL,task-clock_FEATURE_TYPE_POLYNOMIAL,task-clock_FEATURE_TYPE_POWER
0,0.0,12176.278640,5.274472e+00,1.0,354489.841881,31.023435,0,0.0,0.0,0,...,0,1,0,0,0,0,0,0,1,0
1,0.8,12300.866448,1.408669e+02,0.1,360567.282593,278.416553,0,0.0,0.0,0,...,0,0,0,0,0,0,0,1,0,0
2,0.0,12396.648839,3.892814e+01,1.0,358483.822954,81.019933,0,0.0,0.0,0,...,0,0,1,0,0,0,0,0,1,0
3,4.0,12358.582719,-1.096654e-05,1.0,355754.502857,256.760672,0,0.0,0.0,0,...,0,0,0,1,0,1,0,0,0,0
4,1.0,12313.598463,3.715744e+00,1.0,354852.459189,327.289900,0,0.0,0.0,0,...,0,1,0,0,0,1,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5938,0.0,12467.420365,2.007505e+01,1.0,358499.796265,102.392718,0,0.0,0.0,0,...,0,1,0,0,0,0,0,1,0,0
5940,2.0,12278.039263,2.621763e-14,1.0,355209.559184,256.793758,0,0.0,0.0,0,...,0,0,0,0,0,0,0,0,1,0
5942,1.0,12311.611866,3.702682e+00,1.0,354513.610929,884.972592,0,0.0,0.0,0,...,0,0,0,0,0,0,0,0,1,0
5947,1.0,12310.832756,4.590031e+00,1.0,354976.541826,664.581813,0,0.0,0.0,0,...,0,1,0,0,0,0,0,0,1,0


In [5]:
test_dataset_features.sort_index()

Unnamed: 0,branch-misses_FEATURE_CONFIG,branch-misses_INTERCEPT,branch-misses_R-VAL,branches_FEATURE_CONFIG,branches_INTERCEPT,branches_R-VAL,context-switches_FEATURE_CONFIG,context-switches_INTERCEPT,context-switches_R-VAL,cpu-migrations_FEATURE_CONFIG,...,stalled-cycles-frontend_FEATURE_TYPE_LOGLOG_POLYNOMIAL,stalled-cycles-frontend_FEATURE_TYPE_LOG_POLYNOMIAL,stalled-cycles-frontend_FEATURE_TYPE_POLYNOMIAL,stalled-cycles-frontend_FEATURE_TYPE_POWER,task-clock_FEATURE_TYPE_FACTORIAL,task-clock_FEATURE_TYPE_FRACTIONAL_POWER,task-clock_FEATURE_TYPE_LOGLOG_POLYNOMIAL,task-clock_FEATURE_TYPE_LOG_POLYNOMIAL,task-clock_FEATURE_TYPE_POLYNOMIAL,task-clock_FEATURE_TYPE_POWER
8,2.0,12378.326531,-209.163265,0.0,356091.016439,1.403287e+01,0,0.0,0.0,0,...,0,1,0,0,0,0,0,1,0,0
12,1.0,12213.758169,3.625786,1.0,353659.222153,3.252067e+02,0,0.0,0.0,0,...,0,0,0,0,0,1,0,0,0,0
14,0.0,12116.890361,60.779827,1.0,356370.899267,7.348262e+01,0,0.0,0.0,0,...,0,0,0,0,0,1,0,0,0,0
15,1.0,12371.937143,2.987563,1.0,356527.551020,6.530074e+01,0,0.0,0.0,0,...,0,1,0,0,0,0,0,0,0,1
17,0.0,12247.457746,52.703847,1.0,356387.498915,1.423683e+02,0,0.0,0.0,0,...,0,0,0,0,0,1,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5941,1.0,12314.822883,1.704246,1.0,354975.643034,6.615757e+02,0,0.0,0.0,0,...,0,1,0,0,0,0,0,0,1,0
5943,2.0,12300.387768,-0.000003,4.0,359984.733725,-2.879984e-13,0,0.0,0.0,0,...,0,0,1,0,0,0,0,1,0,0
5944,0.0,12233.320504,75.226898,1.0,359885.417516,2.033561e+03,0,0.0,0.0,0,...,0,0,0,0,0,0,0,1,0,0
5945,0.9,12262.235739,89.969619,2.0,355167.687085,1.331121e-03,0,0.0,0.0,0,...,0,0,0,0,0,0,0,0,1,0


In [6]:
normalize = preprocessing.Normalization()
normalize.adapt(train_dataset_features)

model = tf.keras.Sequential([
  normalize,
  layers.Dense(1024),
  layers.Dense(512),
  layers.Dense(512),
  layers.Dense(128),
  layers.Dense(128),
  layers.Dense(64),
  layers.Dense(64),
  layers.Dense(32),
  layers.Dense(32),
  layers.Dense(8),
  layers.Dense(8),  
  layers.Dense(4),
  layers.Dense(2),
  layers.Dense(1, activation='sigmoid')
])

model.compile(loss=tf.keras.losses.BinaryCrossentropy(),
              optimizer = tf.optimizers.Adam(),
              metrics=['accuracy'])

2021-10-22 20:11:39.552349: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2021-10-22 20:11:39.619270: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] None of the MLIR Optimization Passes are enabled (registered 2)


In [7]:
model.fit(train_dataset_features, train_dataset_labels, epochs=50)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x15bbd3430>

In [8]:
train_dataset_features = train.copy()
train_dataset_labels = train.copy().pop('label')

tf.math.confusion_matrix(train_dataset_labels, model.predict(train_dataset_features))

<tf.Tensor: shape=(2, 2), dtype=int32, numpy=
array([[3274,    0],
       [   2,  709]], dtype=int32)>

In [9]:


preds = np.array(model.predict(train_dataset_features)).astype(int)

accuracy = accuracy_score(train_dataset_labels.to_numpy(), preds)
print('Accuracy: %f' % accuracy)
precision = precision_score(train_dataset_labels.to_numpy(), preds)
print('Precision: %f' % precision)
recall = recall_score(train_dataset_labels.to_numpy(), preds)
print('Recall: %f' % recall)
f1 = f1_score(train_dataset_labels.to_numpy(), preds)
print('F1 score: %f' % f1)

Accuracy: 0.999498
Precision: 1.000000
Recall: 0.997187
F1 score: 0.998592


In [10]:
tf.math.confusion_matrix(test_dataset_labels, model.predict(test_dataset_features))

<tf.Tensor: shape=(2, 2), dtype=int32, numpy=
array([[1661,    2],
       [   2,  299]], dtype=int32)>

In [11]:
preds = np.array(model.predict(test_dataset_features)).astype(int)

accuracy = accuracy_score(test_dataset_labels.to_numpy(), preds)
print('Accuracy: %f' % accuracy)
precision = precision_score(test_dataset_labels.to_numpy(), preds)
print('Precision: %f' % precision)
recall = recall_score(test_dataset_labels.to_numpy(), preds)
print('Recall: %f' % recall)
f1 = f1_score(test_dataset_labels.to_numpy(), preds)
print('F1 score: %f' % f1)

Accuracy: 0.997963
Precision: 0.993355
Recall: 0.993355
F1 score: 0.993355
