In [7]:
#%%
import os
import sys
import re
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
import numpy as np
import openpyxl
import pandas as pd
import math
import itertools
import matplotlib.patches as patches
import seaborn as sns
from decimal import Decimal
import glob
import csv
import sklearn
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import sklearn.svm 
import sklearn.metrics
from statsmodels.stats.outliers_influence import variance_inflation_factor
from statsmodels.tools.tools import add_constant
from sklearn.feature_selection import RFE
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.neural_network import MLPClassifier
import tensorflow as tf
from tensorflow.keras import layers
import pickle
import timeit


counts=[0,0,0,0]

#%%

def get_app_list(fileName):
    """get metrics list

    Arguments:
        fileName {string} -- the file name of application file with absolute path

    Returns:
        app {list} -- the app  list
    """
    apps = []
    with open(fileName) as f:
        for line in f.readlines():
            if not line.startswith('#'):
                words = line.strip().split(',')
                app = words[0].strip()
                app_num = words[1].strip()
                apps.append([app, app_num])
        f.close()
    return apps


def load_kernel_file(filepath):
    if not os.path.exists(filepath):
        print(filepath)
        return 
    dataframe = pd.read_csv(filepath)
    if (dataframe.isnull().values.any()):
        print("kernel", filepath)
    if (dataframe.shape[1] == 5 and dataframe.shape[0] == 0):
        print("0kernel", filepath)
        counts[0] +=1
        dataframe.loc[0] = [-1, -1, -1, -1, -1]
        while (dataframe.shape[0] < max_mem_len):
            dataframe = pd.concat([dataframe, dataframe])
        return dataframe.values[:max_mem_len,:]
        #os.remove(filepath)
        #pass
    elif (dataframe.shape[1] == 5 and dataframe.shape[0] <=max_mem_len):
        counts[1] +=1
        while (dataframe.shape[0] < max_mem_len):
            dataframe = pd.concat([dataframe, dataframe])
        return dataframe.values[:max_mem_len,:]
    elif (dataframe.shape[1] == 5 and dataframe.shape[0] > max_mem_len):
        counts[2] +=1
        return dataframe.values[:max_mem_len,:]
    else:
        counts[3] +=1
        print("1kernel", filepath)
        return 

def load_transfer_file(filepath):
    if not os.path.exists(filepath):
        print("transfer", filepath)
        return 
    dataframe = pd.read_csv(filepath)
    if (dataframe.isnull().values.any()):
        print(filepath)
    if (dataframe.shape[1] == 4 and dataframe.shape[0] == 0):
        print("0transfer", filepath)
        counts[0] +=1
        return 
        #os.remove(filepath)
        #pass
    elif (dataframe.shape[1] == 4 and dataframe.shape[0] <=max_mem_len):
        counts[1] +=1
        while (dataframe.shape[0] < max_mem_len):
            dataframe = pd.concat([dataframe, dataframe])
        return dataframe.values[:max_mem_len,:]
    elif (dataframe.shape[1] == 4 and dataframe.shape[0] > max_mem_len):
        counts[2] +=1
        return dataframe.values[:max_mem_len,:]
    else:
        counts[3] +=1
        print("1transfer", filepath)
        return 
        
def load_res_file(filepath, max_res_len):
    dataframe = pd.read_csv(filepath)
    if (dataframe.shape[1] == 4 and dataframe.shape[0] >= max_res_len):
        #if dataframe["u_gpu"].sum() == 0:
            #os.remove(filepath)
        #    print("none", filepath)
        #else:
        return dataframe.values[:max_res_len,:]
    else:
        print("wrong", filepath)

def load_group(arch, sample_rate):

    

    y_label = []
    
    data_kernel_group = []
    data_transfer_group = []
    data_res_group = []
    
    i = 0

    for category in ["mybench", "risky"]:
        mem_pathfolder = '/home/pzou/projects/Power_Signature/results_backup/%s/%s/mem_trace-combine'%(category, arch)
        res_pathfolder = '/home/pzou/projects/Power_Signature/results-%d/%s/%s/power-combine'%(sample_rate, category, arch)
        app_list = get_app_list("/home/pzou/projects/Power_Signature/Scripts/applications-mem_%s.csv"%(category))
        for [app, num] in app_list:
            
            
            if arch=="k40" and "reductionMultiBlockCG" in app:
                continue
            
            kernel_fileName = app+num+"_kernel.csv"
            data = load_kernel_file(os.path.join(mem_pathfolder, kernel_fileName))
            data_kernel_group.append(data)
    

            transfer_fileName = app+num+"_transfer.csv"
            data = load_transfer_file(os.path.join(mem_pathfolder, transfer_fileName))
            data_transfer_group.append(data)
            
            
            res_fileName =app+num+"_int.csv"
            data = load_res_file(os.path.join(res_pathfolder, res_fileName), max_res_len)
            data_res_group.append(data)

            y_label.append(i)
    
        i += 1
        
    data_kernel_group = np.asarray(data_kernel_group)
    data_transfer_group = np.asarray(data_transfer_group)
    data_res_group = np.asarray(data_res_group)
    
    return data_kernel_group, data_transfer_group, data_res_group, y_label

def rnnClassify(data_kernel_group, data_transfer_group, data_res_group, y_label,  arch, model_eval, ratio):
    y_label = pd.Series(y_label)

    fileM = "Fusion"
    
        
    for idx in range(2):
        test_index = [idx]
        y_test = y_label[test_index]
        X_kernel_test = data_kernel_group[test_index]
        X_transfer_test = data_transfer_group[test_index]
        X_res_test = data_res_group[test_index]

        data_X = []
        X_test = [X_kernel_test, X_transfer_test, X_res_test]

        checkpoint_path = "%s/%s-%s-%s-%d-W%d.hdf5"%(arch,fileM ,arch, model_eval, 1, ratio )

        model = tf.keras.models.load_model(checkpoint_path)
        loss, accuracy = model.evaluate(x=X_test, y=y_test)


        y_prob = model.predict([data_kernel_group, data_transfer_group, data_res_group])
        y_predict = [int(i>=0.5) for i in y_prob]
        




if __name__=="__main__":

    print("start loading")
    max_mem_len = 64
    sample_rate = 100
    rate_len = {100:1800, 50:3600, 10:3600}
    max_res_len = rate_len[sample_rate]
    
    arch="p100"
    data_kernel_group, data_transfer_group, data_res_group, y_label= load_group(arch, sample_rate)
    print("Done loading")
    model_eval =  "seen"
    ratio = 8
    start = timeit.default_timer()
    rnnClassify(data_kernel_group, data_transfer_group, data_res_group, y_label,  arch, model_eval, ratio)
    stop = timeit.default_timer()

    print('Time: ', stop - start)  
#%%

start loading
0kernel /home/pzou/projects/Power_Signature/results_backup/mybench/p100/mem_trace-combine/gesummv5_kernel.csv
0kernel /home/pzou/projects/Power_Signature/results_backup/mybench/p100/mem_trace-combine/gesummv7_kernel.csv
0kernel /home/pzou/projects/Power_Signature/results_backup/mybench/p100/mem_trace-combine/heartwall2_kernel.csv
Done loading
Time:  70.02522715553641
