In [1]:
%load_ext autoreload
%autoreload 2

import os
import sys # error msg, add the modules

import pandas as pd
import numpy as np
import operator
import copy

sys.path.append("../pycode/")
from magus_util import read_nvprof_trace, parse_nvprof_trace, getruntime, sort_dict_by_val, genNNFeat
from magus_contention import *

from sklearn.ensemble import ExtraTreesClassifier
from sklearn.datasets import load_iris
from sklearn.feature_selection import SelectFromModel

In [2]:
traceFolder = "./test_files"
appTraces = os.listdir(traceFolder)
print appTraces

['binopt_trace.csv', 'sortingNetworks_trace.csv', 'transpose_trace.csv', 'SobolQRNG_trace.csv', 'reduction_trace.csv', 'matrixMul_trace.csv', 'MC_SingleAsianOptionP_trace.csv', 'mergeSort_trace.csv', 'interval_trace.csv', 'quasirandomGenerator_trace.csv', 'convolutionFFT2D_trace.csv', 'radixSortThrust_trace.csv', 'scan_trace.csv']


In [3]:
app_feat_array = None
app_rowid_dict = {}

appTraceDict = {}

for i, eachAppTrace in enumerate(appTraces):
    appName = eachAppTrace[:-10]
    print appName
    file_csv = traceFolder + '/' + eachAppTrace 
    #print file_csv
    df_trace = read_nvprof_trace(file_csv)
    appTraceList = parse_nvprof_trace(df_trace)
    #print appTraceList
    appTraceDict[appName] = appTraceList
    
    if i == 0:
        app_feat_array = genNNFeat(appTraceList)
    else:
        app_feat_array = np.vstack((app_feat_array, genNNFeat(appTraceList)))
        
    app_rowid_dict[appName] = i

binopt
sortingNetworks
transpose
SobolQRNG
reduction
matrixMul
MC_SingleAsianOptionP
mergeSort
interval
quasirandomGenerator
convolutionFFT2D
radixSortThrust
scan


In [4]:
appTrace = appTraceDict['binopt']
appTrace

[['h2d', 566.138617, 566.141849, 0.0, 0.0, 0.0, 0.0],
 ['kern', 566.150169, 575.913912, 1024.0, 128.0, 32.0, 516.0],
 ['d2h', 575.915512, 575.917496, 0.0, 0.0, 0.0, 0.0]]

In [5]:
app_feat_array.shape

(13, 45)

In [6]:
# row index in app_feat_array[]
app_rowid_dict

{'MC_SingleAsianOptionP': 6,
 'SobolQRNG': 3,
 'binopt': 0,
 'convolutionFFT2D': 10,
 'interval': 8,
 'matrixMul': 5,
 'mergeSort': 7,
 'quasirandomGenerator': 9,
 'radixSortThrust': 11,
 'reduction': 4,
 'scan': 12,
 'sortingNetworks': 1,
 'transpose': 2}

### Ground Truth

In [7]:
#
# Measure the performance for the combination of running two applications concurrently the same gpu
#
import json

with open('../00_featSel/contention_tests/combo_truth.json') as json_file:  
    combo_list = json.load(json_file)
    
unique_appName = set()
for eachCombo in combo_list:
    print eachCombo
    unique_appName.add(eachCombo[0])

[u'mergeSort', u'reduction', 1]
[u'mergeSort', u'SobolQRNG', 0]
[u'mergeSort', u'scan', 0]
[u'mergeSort', u'matrixMul', 0]
[u'mergeSort', u'convfft2d', 1]
[u'mergeSort', u'quasirandomGenerator', 1]
[u'mergeSort', u'binopt', 1]
[u'mergeSort', u'interval', 0]
[u'mergeSort', u'MCSingleAsianOptionP', 0]
[u'mergeSort', u'transpose', 0]
[u'mergeSort', u'radixSortThrust', 1]
[u'mergeSort', u'sortingNetworks', 1]
[u'radixSortThrust', u'scan', 0]
[u'radixSortThrust', u'reduction', 0]
[u'radixSortThrust', u'mergeSort', 1]
[u'radixSortThrust', u'interval', 1]
[u'radixSortThrust', u'matrixMul', 1]
[u'radixSortThrust', u'SobolQRNG', 1]
[u'radixSortThrust', u'transpose', 1]
[u'radixSortThrust', u'sortingNetworks', 1]
[u'radixSortThrust', u'binopt', 1]
[u'radixSortThrust', u'convfft2d', 1]
[u'radixSortThrust', u'quasirandomGenerator', 1]
[u'radixSortThrust', u'MCSingleAsianOptionP', 0]
[u'scan', u'MCSingleAsianOptionP', 0]
[u'scan', u'convfft2d', 1]
[u'scan', u'interval', 1]
[u'scan', u'reduction', 0

In [8]:
for key, value in app_rowid_dict.iteritems():
    if key in unique_appName:
        print key
    else:
        print("[Warning] {} is not in the same app name.".format(key))

        
# MC_SingleAsianOptionP -> MCSingleAsianOptionP 
# convolutionFFT2D -> convfft2d

mergeSort
radixSortThrust
scan
SobolQRNG
interval
transpose
matrixMul
quasirandomGenerator
reduction
binopt
sortingNetworks


### Build training set

In [9]:
len(combo_list)

156

In [10]:
X_input = None
y_label = np.zeros(len(combo_list), dtype=np.int32)

count = 0
for eachCombo in combo_list:
    [app1, app2, goodCombo]= eachCombo
    #print app1, app2, goodCombo
    
    if app1 == "MCSingleAsianOptionP": app1 = "MC_SingleAsianOptionP"
    if app1 == "convfft2d":            app1 = "convolutionFFT2D"
    
    if app2 == "MCSingleAsianOptionP": app2 = "MC_SingleAsianOptionP"
    if app2 == "convfft2d":            app2 = "convolutionFFT2D"
        
    row1 = app_rowid_dict[app1]
    row2 = app_rowid_dict[app2]
    
    arr1 = app_feat_array[row1]
    arr2 = app_feat_array[row2]

    currentCombo = np.append(arr1, arr2)
    
    if count == 0:
        X_input = currentCombo
    else:
        X_input = np.vstack((X_input, currentCombo))
        
    y_label[count] = int(goodCombo)
    
    count = count + 1

In [11]:
print X_input.shape
print y_label.shape

(156, 90)
(156,)


In [12]:
print combo_list[0]
print y_label[0]

[u'mergeSort', u'reduction', 1]
1


In [13]:
print combo_list[-1]
print y_label[-1]

[u'sortingNetworks', u'matrixMul', 0]
0


### Build NN Model

In [14]:
y_label

array([1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0,
       0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0,
       1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1,
       0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0], dtype=int32)

In [15]:
X_input.shape

(156, 90)

In [16]:
X_input.shape[0]

156

In [17]:
import numpy as np                                                              
import tensorflow as tf

In [18]:
# to make this notebook's output stable across runs
def reset_graph(seed=42):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)

In [19]:
reset_graph()

In [20]:
from functools import partial


n_inputs = 90
n_hidden1 = 100
n_hidden2 = 100
n_hidden3 = 100
n_hidden4 = 100
n_hidden5 = 100
n_hidden6 = 100
n_hidden7 = 100
n_hidden8 = 100
n_hidden9 = 100
n_hidden10 = 100
n_outputs = 2 # good (1) or bad (0)


X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
y = tf.placeholder(tf.int32, shape=(None), name="y")

training = tf.placeholder_with_default(False, shape=(), name='training')
batch_norm_momentum = 0.9
learning_rate = 0.01

with tf.name_scope("dnn"):
    he_init = tf.contrib.layers.variance_scaling_initializer()
    
    # avoid repeating the same parameters over and over again
    my_batch_norm_layer = partial(tf.layers.batch_normalization,
            training=training, momentum=batch_norm_momentum)
    
    my_dense_layer = partial(tf.layers.dense, kernel_initializer=he_init) # activeFunc after BN
    
    hidden1 = my_dense_layer(X, n_hidden1, name="hidden1")
    bn1= my_batch_norm_layer(hidden1)
    bn1_act = tf.nn.elu(bn1)
    
    hidden2 = my_dense_layer(bn1_act, n_hidden2, name="hidden2")
    bn2 = my_batch_norm_layer(hidden2)
    bn2_act = tf.nn.elu(bn2)
    
    hidden3 = my_dense_layer(bn2_act, n_hidden3, name="hidden3")
    bn3 = my_batch_norm_layer(hidden3)
    bn3_act = tf.nn.elu(bn3)

    hidden4 = my_dense_layer(bn3_act, n_hidden4, name="hidden4")
    bn4_act = tf.nn.elu(my_batch_norm_layer(hidden4))
    
    hidden5 = my_dense_layer(bn4_act, n_hidden5, name="hidden5")
    bn5_act = tf.nn.elu(my_batch_norm_layer(hidden5))
    
    hidden6 = my_dense_layer(bn5_act, n_hidden6, name="hidden6")
    bn6_act = tf.nn.elu(my_batch_norm_layer(hidden6))
    
    hidden7 = my_dense_layer(bn6_act, n_hidden7, name="hidden7")
    bn7_act = tf.nn.elu(my_batch_norm_layer(hidden7))
    
    hidden8 = my_dense_layer(bn7_act, n_hidden8, name="hidden8")
    bn8_act = tf.nn.elu(my_batch_norm_layer(hidden8))
    
    hidden9 = my_dense_layer(bn8_act, n_hidden9, name="hidden9")
    bn9_act = tf.nn.elu(my_batch_norm_layer(hidden9))
    
    hidden10 = my_dense_layer(bn9_act, n_hidden10, name="hidden10")
    bn10_act = tf.nn.elu(my_batch_norm_layer(hidden10))
    
    
    logits_before_bn = my_dense_layer(bn10_act, n_outputs, name="outputs")
    logits = my_batch_norm_layer(logits_before_bn)

    
with tf.name_scope("loss"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
    loss = tf.reduce_mean(xentropy, name="loss")

with tf.name_scope("train"):
    optimizer = tf.train.GradientDescentOptimizer(learning_rate)
    training_op = optimizer.minimize(loss)
    
with tf.name_scope("eval"):
    correct = tf.nn.in_top_k(logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
    
init = tf.global_variables_initializer()
saver = tf.train.Saver()

n_epochs = 100
batch_size = 64

### train NN

In [21]:
def next_batch(X_input, y_label, batch_size):
    samples = X_input.shape[0]
    idx = np.arange(0 , samples)
    np.random.shuffle(idx)
    idx = idx[:batch_size]
    
    input_shuffle = [X_input[i,:] for i in idx]
    label_shuffle = [y_label[i] for i in idx]
    
    return np.asarray(input_shuffle), np.asarray(label_shuffle)
    
    
# X_batch, y_batch = next_batch(X_input, y_label, batch_size)
# print y_batch

In [22]:
extra_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)

with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        for iteration in range(X_input.shape[0] // batch_size):
            X_batch, y_batch = next_batch(X_input, y_label, batch_size)
            sess.run([training_op, extra_update_ops],
                     feed_dict={training: True, X: X_batch, y: y_batch})
            
        if epoch % 5 == 0:
            acc_train = accuracy.eval(feed_dict={X: X_batch,
                                                 y: y_batch})
            
            print(epoch, "Batch accuracy:", acc_train)

    # save the model
    save_path = saver.save(sess, "./my_model_final.ckpt")

(0, 'Batch accuracy:', 0.453125)
(5, 'Batch accuracy:', 0.6875)
(10, 'Batch accuracy:', 0.734375)
(15, 'Batch accuracy:', 0.8125)
(20, 'Batch accuracy:', 0.78125)
(25, 'Batch accuracy:', 0.84375)
(30, 'Batch accuracy:', 0.765625)
(35, 'Batch accuracy:', 0.8125)
(40, 'Batch accuracy:', 0.890625)
(45, 'Batch accuracy:', 0.9375)
(50, 'Batch accuracy:', 0.921875)
(55, 'Batch accuracy:', 0.96875)
(60, 'Batch accuracy:', 0.96875)
(65, 'Batch accuracy:', 0.9375)
(70, 'Batch accuracy:', 0.890625)
(75, 'Batch accuracy:', 0.984375)
(80, 'Batch accuracy:', 0.984375)
(85, 'Batch accuracy:', 0.96875)
(90, 'Batch accuracy:', 0.984375)
(95, 'Batch accuracy:', 0.984375)


### Restore trained model

In [23]:
with tf.Session() as sess:
    saver.restore(sess, "./my_model_final.ckpt")
    logits_pred, correct_pred = sess.run([logits, correct], feed_dict={X: X_input, y: y_label})
    print logits_pred
    print correct_pred

INFO:tensorflow:Restoring parameters from ./my_model_final.ckpt
[[-0.6312139   0.14674455]
 [ 0.57953531 -0.41844293]
 [ 1.37607181 -1.09437823]
 [ 1.4443891  -0.7199074 ]
 [-1.26798546  1.58939111]
 [-0.68992567 -0.03102233]
 [ 0.33009899  0.60890764]
 [ 1.35724425 -1.06571829]
 [ 0.29175171 -0.40774921]
 [ 0.61251342 -1.84418297]
 [-1.5248301   1.0131954 ]
 [-1.73923135  1.88429785]
 [ 1.00557637 -1.5489434 ]
 [ 0.62291873 -0.8557173 ]
 [-1.07816327  1.57876408]
 [-1.21822333  1.52735198]
 [-1.15842497  0.78076732]
 [-0.51624525  1.05309653]
 [-1.35001647  1.8309375 ]
 [-1.41794693  1.36636734]
 [-1.35521913  2.25999236]
 [-1.03074324  1.59340489]
 [-0.35610935  0.52346599]
 [ 0.24891677 -0.51751149]
 [ 0.66453087  0.40684631]
 [-1.09040761  1.11814702]
 [-0.68408132  0.64214128]
 [ 1.40658522  0.12500617]
 [ 1.38920653 -1.63846767]
 [ 0.67208934 -1.68575513]
 [-0.81189919  0.96679223]
 [-0.60230434  1.48716152]
 [ 0.75524688 -1.46280825]
 [ 1.87033021 -1.14237344]
 [-1.01619339  1.7