In [1]:
%load_ext autoreload
%autoreload 2

import os
import sys # error msg, add the modules

import pandas as pd
import numpy as np
import operator
import copy

sys.path.append("../pycode/")
from magus_util import read_nvprof_trace, parse_nvprof_trace, getruntime, sort_dict_by_val, genNNFeat
from magus_contention import *

from sklearn.ensemble import ExtraTreesClassifier
from sklearn.datasets import load_iris
from sklearn.feature_selection import SelectFromModel

from datetime import datetime

### obtain trace files

In [2]:
#
# 13 cuda apps
#
traceFolder = "./test_files"
appTraces = os.listdir(traceFolder)
print appTraces
print len(appTraces)

['binopt_trace.csv', 'sortingNetworks_trace.csv', 'transpose_trace.csv', 'SobolQRNG_trace.csv', 'reduction_trace.csv', 'matrixMul_trace.csv', 'MC_SingleAsianOptionP_trace.csv', 'mergeSort_trace.csv', 'interval_trace.csv', 'quasirandomGenerator_trace.csv', 'convolutionFFT2D_trace.csv', 'radixSortThrust_trace.csv', 'scan_trace.csv']
13


In [3]:
#
# additional 10 apps 
#
traceFolder_extra = "./test_apps_trace"
appTraces_extra = os.listdir(traceFolder_extra)
print appTraces_extra
print len(appTraces_extra)

['FDTD3d_trace.csv', 'convolutionSeparable_trace.csv', 'dct8x8_trace.csv', 'nvgraph_Pagerank_trace.csv', 'histogram_trace.csv', 'batchCUBLAS_trace.csv', 'simpleCUFFT_callback_trace.csv', 'conjugateGradient_trace.csv', 'boxFilterNPP_trace.csv', 'BlackScholes_trace.csv']
10


### generate feature matrix based on traces

In [4]:
app_feat_array = None
app_rowid_dict = {}
appTraceDict = {}

#
# 13 cuda apps
#
print('\n 13 cuda apps \n')
for i, eachAppTrace in enumerate(appTraces):
    appName = eachAppTrace[:-10]
    print appName
    file_csv = traceFolder + '/' + eachAppTrace 
    #print file_csv
    df_trace = read_nvprof_trace(file_csv)
    appTraceList = parse_nvprof_trace(df_trace)
    #print appTraceList
    appTraceDict[appName] = appTraceList
    
    if i == 0:
        app_feat_array = genNNFeat(appTraceList)
    else:
        app_feat_array = np.vstack((app_feat_array, genNNFeat(appTraceList)))
        
    #print i
    app_rowid_dict[appName] = i

#
# additional 10 apps 
#   
print('\n extra 10 cuda apps \n')
for i, eachAppTrace in enumerate(appTraces_extra):
    appName = eachAppTrace[:-10]
    print appName
    
    file_csv = traceFolder_extra + '/' + eachAppTrace 
    #print file_csv
    
    df_trace = read_nvprof_trace(file_csv)
    appTraceList = parse_nvprof_trace(df_trace)
    #print appTraceList
    
    appTraceDict[appName] = appTraceList
    
    app_feat_array = np.vstack((app_feat_array, genNNFeat(appTraceList)))
     
    #print i + 13
    app_rowid_dict[appName] = i


 13 cuda apps 

binopt
sortingNetworks
transpose
SobolQRNG
reduction
matrixMul
MC_SingleAsianOptionP
mergeSort
interval
quasirandomGenerator
convolutionFFT2D
radixSortThrust
scan

 extra 10 cuda apps 

FDTD3d
convolutionSeparable
dct8x8
nvgraph_Pagerank
histogram
batchCUBLAS
simpleCUFFT_callback
conjugateGradient
boxFilterNPP
BlackScholes


### simple verification

In [5]:
appTrace = appTraceDict['binopt']
appTrace

[['h2d', 566.138617, 566.141849, 0.0, 0.0, 0.0, 0.0],
 ['kern', 566.150169, 575.913912, 1024.0, 128.0, 32.0, 516.0],
 ['d2h', 575.915512, 575.917496, 0.0, 0.0, 0.0, 0.0]]

In [6]:
appTrace = appTraceDict['FDTD3d']
appTrace

[['h2d', 12137.205, 12703.343918, 0.0, 0.0, 0.0, 0.0],
 ['h2d', 12703.43, 13269.573654, 0.0, 0.0, 0.0, 0.0],
 ['h2d', 13269.586000000001, 13269.587184000002, 0.0, 0.0, 0.0, 0.0],
 ['kern', 13269.617999999999, 13272.026054999998, 288.0, 512.0, 40.0, 3750.0],
 ['kern', 13272.027, 13274.409998, 288.0, 512.0, 40.0, 3750.0],
 ['kern', 13274.412, 13276.803031, 288.0, 512.0, 40.0, 3750.0],
 ['kern', 13276.805, 13279.185374, 288.0, 512.0, 40.0, 3750.0],
 ['kern', 13279.187, 13281.584623, 288.0, 512.0, 40.0, 3750.0],
 ['d2h', 13281.599999999999, 13823.284344999998, 0.0, 0.0, 0.0, 0.0]]

In [7]:
app_feat_array.shape

(23, 45)

In [8]:
# row index in app_feat_array[]
app_rowid_dict

{'BlackScholes': 9,
 'FDTD3d': 0,
 'MC_SingleAsianOptionP': 6,
 'SobolQRNG': 3,
 'batchCUBLAS': 5,
 'binopt': 0,
 'boxFilterNPP': 8,
 'conjugateGradient': 7,
 'convolutionFFT2D': 10,
 'convolutionSeparable': 1,
 'dct8x8': 2,
 'histogram': 4,
 'interval': 8,
 'matrixMul': 5,
 'mergeSort': 7,
 'nvgraph_Pagerank': 3,
 'quasirandomGenerator': 9,
 'radixSortThrust': 11,
 'reduction': 4,
 'scan': 12,
 'simpleCUFFT_callback': 6,
 'sortingNetworks': 1,
 'transpose': 2}

### read the ground truth files

In [9]:
#
# Measure the performance for the combination of running two applications concurrently the same gpu
#
import json

#
# 13 cuda apps
#
with open('../00_featSel/contention_tests/combo_truth.json') as json_file:  
    combo_list = json.load(json_file)
    
unique_appName = set()
for eachCombo in combo_list:
    print eachCombo
    unique_appName.add(eachCombo[0])

#
# 10 extra cuda apps
#
with open('./combo_truth_testApps.json') as json_file:  
    combo_list_extra = json.load(json_file)
    
for eachCombo in combo_list_extra:
    print eachCombo
    unique_appName.add(eachCombo[0])

[u'mergeSort', u'reduction', 1]
[u'mergeSort', u'SobolQRNG', 0]
[u'mergeSort', u'scan', 0]
[u'mergeSort', u'matrixMul', 0]
[u'mergeSort', u'convfft2d', 1]
[u'mergeSort', u'quasirandomGenerator', 1]
[u'mergeSort', u'binopt', 1]
[u'mergeSort', u'interval', 0]
[u'mergeSort', u'MCSingleAsianOptionP', 0]
[u'mergeSort', u'transpose', 0]
[u'mergeSort', u'radixSortThrust', 1]
[u'mergeSort', u'sortingNetworks', 1]
[u'radixSortThrust', u'scan', 0]
[u'radixSortThrust', u'reduction', 0]
[u'radixSortThrust', u'mergeSort', 1]
[u'radixSortThrust', u'interval', 1]
[u'radixSortThrust', u'matrixMul', 1]
[u'radixSortThrust', u'SobolQRNG', 1]
[u'radixSortThrust', u'transpose', 1]
[u'radixSortThrust', u'sortingNetworks', 1]
[u'radixSortThrust', u'binopt', 1]
[u'radixSortThrust', u'convfft2d', 1]
[u'radixSortThrust', u'quasirandomGenerator', 1]
[u'radixSortThrust', u'MCSingleAsianOptionP', 0]
[u'scan', u'MCSingleAsianOptionP', 0]
[u'scan', u'convfft2d', 1]
[u'scan', u'interval', 1]
[u'scan', u'reduction', 0

In [10]:
#
# print out unique apps
#
for key, value in app_rowid_dict.iteritems():
    if key in unique_appName:
        print key
    else:
        print("[Warning] {} is not in the same app name.".format(key))

boxFilterNPP
scan
convolutionSeparable
mergeSort
SobolQRNG
matrixMul
BlackScholes
reduction
binopt
sortingNetworks
FDTD3d
transpose
histogram
quasirandomGenerator
conjugateGradient
dct8x8
batchCUBLAS
interval
radixSortThrust


### buid the data set

In [11]:
X_input = None
y_label = np.zeros(len(combo_list) + len(combo_list_extra), dtype=np.int32)

#
# 13 cuda apps
#
count = 0
for eachCombo in combo_list:
    [app1, app2, goodCombo]= eachCombo
    #print app1, app2, goodCombo
    
    if app1 == "MCSingleAsianOptionP": app1 = "MC_SingleAsianOptionP"
    if app1 == "convfft2d":            app1 = "convolutionFFT2D"
    
    if app2 == "MCSingleAsianOptionP": app2 = "MC_SingleAsianOptionP"
    if app2 == "convfft2d":            app2 = "convolutionFFT2D"
        
    row1 = app_rowid_dict[app1]
    row2 = app_rowid_dict[app2]
    
    arr1 = app_feat_array[row1] # read feature vector for the app
    arr2 = app_feat_array[row2]

    currentCombo = np.append(arr1, arr2)
    
    if count == 0:
        X_input = currentCombo
    else:
        X_input = np.vstack((X_input, currentCombo))
        
    y_label[count] = int(goodCombo)
    
    count = count + 1


#
# extra 10 apps
#
for eachCombo in combo_list_extra:
    [app1, app2, goodCombo]= eachCombo
    #print app1, app2, goodCombo
    
    if app1 == "nvgraphPagerank":                app1 = "nvgraph_Pagerank"
    if app1 == "simpleCUFFTcallback":            app1 = "simpleCUFFT_callback"
    
    if app2 == "nvgraphPagerank":                app2 = "nvgraph_Pagerank"
    if app2 == "simpleCUFFTcallback":            app2 = "simpleCUFFT_callback"
        
    row1 = app_rowid_dict[app1]
    row2 = app_rowid_dict[app2]
    
    arr1 = app_feat_array[row1]
    arr2 = app_feat_array[row2]

    currentCombo = np.append(arr1, arr2)
    
    X_input = np.vstack((X_input, currentCombo))
        
    y_label[count] = int(goodCombo)
    
    count = count + 1


### check X_input and y_label

In [12]:
print X_input.shape
print y_label.shape

(246, 90)
(246,)


In [13]:
# print combo_list[0]
# print y_label[0]

### Build NN Model

In [14]:
y_label

array([1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0,
       0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0,
       1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1,
       0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], dtype=int32)

In [15]:
X_input.shape

(246, 90)

In [16]:
import numpy as np                                                              
import tensorflow as tf

In [17]:
# to make this notebook's output stable across runs
def reset_graph(seed=42):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)

In [18]:
reset_graph()

In [19]:
from functools import partial

n_inputs = 90
n_hidden = 500    # tunning the layer size here!!!
n_outputs = 2    # good (1) or bad (0)


X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
y = tf.placeholder(tf.int32, shape=(None), name="y")

training = tf.placeholder_with_default(False, shape=(), name='training')
batch_norm_momentum = 0.9
learning_rate = 0.01

with tf.name_scope("dnn"):
    he_init = tf.contrib.layers.variance_scaling_initializer()
    
    # avoid repeating the same parameters over and over again
    my_batch_norm_layer = partial(tf.layers.batch_normalization,
            training=training, momentum=batch_norm_momentum)
    
    my_dense_layer = partial(tf.layers.dense, kernel_initializer=he_init) # activeFunc after BN
    
    hidden1 = my_dense_layer(X, n_hidden, name="hidden1")
    bn1= my_batch_norm_layer(hidden1)
    bn1_act = tf.nn.elu(bn1)
    
    hidden2 = my_dense_layer(bn1_act, n_hidden, name="hidden2")
    bn2 = my_batch_norm_layer(hidden2)
    bn2_act = tf.nn.elu(bn2)
    
    hidden3 = my_dense_layer(bn2_act, n_hidden, name="hidden3")
    bn3 = my_batch_norm_layer(hidden3)
    bn3_act = tf.nn.elu(bn3)

    hidden4 = my_dense_layer(bn3_act, n_hidden, name="hidden4")
    bn4_act = tf.nn.elu(my_batch_norm_layer(hidden4))
    
    hidden5 = my_dense_layer(bn4_act, n_hidden, name="hidden5")
    bn5_act = tf.nn.elu(my_batch_norm_layer(hidden5))
    
    hidden6 = my_dense_layer(bn5_act, n_hidden, name="hidden6")
    bn6_act = tf.nn.elu(my_batch_norm_layer(hidden6))
    
    hidden7 = my_dense_layer(bn6_act, n_hidden, name="hidden7")
    bn7_act = tf.nn.elu(my_batch_norm_layer(hidden7))
    
    hidden8 = my_dense_layer(bn7_act, n_hidden, name="hidden8")
    bn8_act = tf.nn.elu(my_batch_norm_layer(hidden8))
    
    hidden9 = my_dense_layer(bn8_act, n_hidden, name="hidden9")
    bn9_act = tf.nn.elu(my_batch_norm_layer(hidden9))
    
    hidden10 = my_dense_layer(bn9_act, n_hidden, name="hidden10")
    bn10_act = tf.nn.elu(my_batch_norm_layer(hidden10))
    
    # add extra 10 layer
    hidden11 = my_dense_layer(bn10_act, n_hidden, name="hidden11")
    bn11_act = tf.nn.elu(my_batch_norm_layer(hidden11))
    
    hidden12 = my_dense_layer(bn11_act, n_hidden, name="hidden12")
    bn12_act = tf.nn.elu(my_batch_norm_layer(hidden12))
    
    hidden13 = my_dense_layer(bn12_act, n_hidden, name="hidden13")
    bn13_act = tf.nn.elu(my_batch_norm_layer(hidden13))
    
    hidden14 = my_dense_layer(bn13_act, n_hidden, name="hidden14")
    bn14_act = tf.nn.elu(my_batch_norm_layer(hidden14))
    
    hidden15 = my_dense_layer(bn14_act, n_hidden, name="hidden15")
    bn15_act = tf.nn.elu(my_batch_norm_layer(hidden15))
    
    hidden16 = my_dense_layer(bn15_act, n_hidden, name="hidden16")
    bn16_act = tf.nn.elu(my_batch_norm_layer(hidden16))
    
    hidden17 = my_dense_layer(bn16_act, n_hidden, name="hidden17")
    bn17_act = tf.nn.elu(my_batch_norm_layer(hidden17))
    
    hidden18 = my_dense_layer(bn17_act, n_hidden, name="hidden18")
    bn18_act = tf.nn.elu(my_batch_norm_layer(hidden18))
    
    hidden19 = my_dense_layer(bn18_act, n_hidden, name="hidden19")
    bn19_act = tf.nn.elu(my_batch_norm_layer(hidden19))
    
    hidden20 = my_dense_layer(bn19_act, n_hidden, name="hidden20")
    bn20_act = tf.nn.elu(my_batch_norm_layer(hidden20))
    
    
    # add extra 10 layer
    hidden21 = my_dense_layer(bn20_act, n_hidden, name="hidden21")
    bn21_act = tf.nn.elu(my_batch_norm_layer(hidden21))
    
    hidden22 = my_dense_layer(bn21_act, n_hidden, name="hidden22")
    bn22_act = tf.nn.elu(my_batch_norm_layer(hidden22))
    
    hidden23 = my_dense_layer(bn22_act, n_hidden, name="hidden23")
    bn23_act = tf.nn.elu(my_batch_norm_layer(hidden23))
    
    hidden24 = my_dense_layer(bn23_act, n_hidden, name="hidden24")
    bn24_act = tf.nn.elu(my_batch_norm_layer(hidden24))
    
    hidden25 = my_dense_layer(bn24_act, n_hidden, name="hidden25")
    bn25_act = tf.nn.elu(my_batch_norm_layer(hidden25))
    
    hidden26 = my_dense_layer(bn25_act, n_hidden, name="hidden26")
    bn26_act = tf.nn.elu(my_batch_norm_layer(hidden26))
    
    hidden27 = my_dense_layer(bn26_act, n_hidden, name="hidden27")
    bn27_act = tf.nn.elu(my_batch_norm_layer(hidden27))
    
    hidden28 = my_dense_layer(bn27_act, n_hidden, name="hidden28")
    bn28_act = tf.nn.elu(my_batch_norm_layer(hidden28))
    
    hidden29 = my_dense_layer(bn28_act, n_hidden, name="hidden29")
    bn29_act = tf.nn.elu(my_batch_norm_layer(hidden29))
    
    hidden30 = my_dense_layer(bn29_act, n_hidden, name="hidden30")
    bn30_act = tf.nn.elu(my_batch_norm_layer(hidden30))
    
    logits_before_bn = my_dense_layer(bn30_act, n_outputs, name="outputs")
    logits = my_batch_norm_layer(logits_before_bn)
    
with tf.name_scope("loss"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
    loss = tf.reduce_mean(xentropy, name="loss")

with tf.name_scope("train"):
    optimizer = tf.train.GradientDescentOptimizer(learning_rate)
    training_op = optimizer.minimize(loss)
    
with tf.name_scope("eval"):
    correct = tf.nn.in_top_k(logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
    
init = tf.global_variables_initializer()
saver = tf.train.Saver()


In [20]:
n_epochs = 1000
batch_size = 96

total_samples = X_input.shape[0]
n_batches = int(np.ceil(total_samples / batch_size))
print 'total_samples = %d , n_batches = %d' % (total_samples, n_batches)

total_samples = 246 , n_batches = 2


### train NN

In [21]:
def next_batch(X_input, y_label, batch_size):
    samples = X_input.shape[0]
    idx = np.arange(0 , samples)
    np.random.shuffle(idx)
    idx = idx[:batch_size]
    
    input_shuffle = [X_input[i,:] for i in idx]
    label_shuffle = [y_label[i] for i in idx]
    
    return np.asarray(input_shuffle), np.asarray(label_shuffle)
    
### test
X_batch, y_batch = next_batch(X_input, y_label, batch_size)
print y_batch

[0 1 1 1 0 1 1 1 0 1 1 1 1 1 0 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0
 1 0 0 1 1 1 1 1 1 0 1 0 1 0 1 0 1 1 1 1 1 1 1 0 0 1 1 1 1 1 0 1 1 0 1 1 0
 0 1 0 0 1 0 0 1 1 0 1 0 1 1 1 0 1 0 0 1 1 0]


In [22]:
#
# log for tensorboard
#
now = datetime.utcnow().strftime("%Y%m%d%H%M%S")
root_logdir = "tf_logs"
logdir = "{}/run-{}/".format(root_logdir, now)

loss_summary = tf.summary.scalar('loss', loss)
file_writer = tf.summary.FileWriter(logdir, tf.get_default_graph())

In [23]:
extra_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)

BestAcc = 0.0

with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        for iteration in range(n_batches): # go through all the batches
            # 1)  generate batch
            X_batch, y_batch = next_batch(X_input, y_label, batch_size)
            
            # 2) save the loss (at the 5th batch)
            if iteration % 2 == 0:
                summary_str = loss_summary.eval(feed_dict={X: X_batch, y: y_batch})  # save the loss func values
                step = epoch * n_batches + iteration
                file_writer.add_summary(summary_str, step)
                
            # 3) run the training
            sess.run([training_op, extra_update_ops],
                     feed_dict={training: True, X: X_batch, y: y_batch})
            
            
        # check the accuracy every 5 epoches
        if epoch % 5 == 0:
            acc_train = accuracy.eval(feed_dict={X: X_batch,
                                                 y: y_batch})
            print(epoch, "Batch accuracy:", acc_train)
            
            if acc_train > BestAcc:
                BestAcc = acc_train
                
            if acc_train > 0.92:
                # save the model
                print('=> Found a good model at accuracy : {}   (Saving the model ... )'.format(acc_train))
                save_path = saver.save(sess, "./dinn_final.ckpt")

                
print("Best Accuracy Achieved : {}".format(BestAcc))

#
# end logging
# 
file_writer.close()

(0, 'Batch accuracy:', 0.48958334)
(5, 'Batch accuracy:', 0.67708331)
(10, 'Batch accuracy:', 0.79166669)
(15, 'Batch accuracy:', 0.80208331)
(20, 'Batch accuracy:', 0.77083331)
(25, 'Batch accuracy:', 0.8125)
(30, 'Batch accuracy:', 0.79166669)
(35, 'Batch accuracy:', 0.84375)
(40, 'Batch accuracy:', 0.79166669)
(45, 'Batch accuracy:', 0.78125)
(50, 'Batch accuracy:', 0.80208331)
(55, 'Batch accuracy:', 0.82291669)
(60, 'Batch accuracy:', 0.86458331)
(65, 'Batch accuracy:', 0.84375)
(70, 'Batch accuracy:', 0.83333331)
(75, 'Batch accuracy:', 0.83333331)
(80, 'Batch accuracy:', 0.88541669)
(85, 'Batch accuracy:', 0.88541669)
(90, 'Batch accuracy:', 0.78125)
(95, 'Batch accuracy:', 0.79166669)
(100, 'Batch accuracy:', 0.83333331)
(105, 'Batch accuracy:', 0.8125)
(110, 'Batch accuracy:', 0.80208331)
(115, 'Batch accuracy:', 0.79166669)
(120, 'Batch accuracy:', 0.83333331)
(125, 'Batch accuracy:', 0.88541669)
(130, 'Batch accuracy:', 0.83333331)
(135, 'Batch accuracy:', 0.79166669)
(140, 

### Restore trained model

In [24]:
with tf.Session() as sess:
    saver.restore(sess, "./dinn_final.ckpt")
    logits_pred, correct_pred = sess.run([logits, correct], feed_dict={X: X_input, y: y_label})
    print logits_pred[0]
    print y_label[0]
    print correct_pred[0]

INFO:tensorflow:Restoring parameters from ./dinn_final.ckpt
[-1.46900022  1.37378049]
1
True


In [25]:
# a = X_input[0,:]
# a.shape

In [26]:
# # b = np.transpose(a)
# b = a.T
# b.shape

In [27]:
# b = np.reshape(a, (1,90))
# b.shape

In [28]:
# b

In [29]:
# y_label[0]

In [30]:
# with tf.Session() as sess:
#     saver.restore(sess, "./model_final_30hdly_500lysize_bah128.ckpt")
#     logits_pred = sess.run([logits, correct], feed_dict={X: b, y: y_label[0]})
#     print logits_pred