In [15]:
import os
import sys
import re
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
import numpy as np
import openpyxl
import pandas as pd
import math
import itertools
import matplotlib.patches as patches
import seaborn as sns
from decimal import Decimal
from csv import reader
import csv
from tsfresh import extract_features
from tsfresh import select_features
from tsfresh.utilities.dataframe_functions import impute
import glob

def get_app_list(fileName):
    """get metrics list

    Arguments:
        fileName {string} -- the file name of application file with absolute path

    Returns:
        app {list} -- the app  list
    """
    apps = []
    with open(fileName) as f:
        for line in f.readlines():
            if not line.startswith('#'):
                words = line.strip().split(',')
                app = words[0].strip()
                app_num = words[1].strip()
                apps.append(app+app_num)
        f.close()
    return apps

def trace_read_C(fileName, kernel_outName, transfer_outName):
    # index
    start       = 0
    duration    = 0
    thread_size        = 0
    throughtput = 0
    device      = 0
    context     = 0
    stream      = 0
    name        = 0

    with open(fileName) as f:
        reader = csv.reader(f, delimiter=',')
        for row in reader:
            #print(row)
            if ('Start' in row):
                if ('Throughput' not in row):
                    return [-1,-1,-1,-1] 
                #print(row)
                start      = row.index('Start')
                duration   = row.index('Duration')
                grid_x     = row.index('Grid X')
                grid_y     = row.index('Grid Y')
                grid_z     = row.index('Grid Z')
                block_x     = row.index('Block X')
                block_y     = row.index('Block Y')
                block_z     = row.index('Block Z')
                name       = row.index('Name')
                throughput = row.index('Throughput')

                break
               
        ndev = 0
        row = []
        for r in reader:
            row.append(r)
        
        time = 0.0
        time_K = 0.0
        time_H_D = 0.0
        time_D_H = 0.0
        time_D_D = 0.0
        transfer_trace = []
        kernel_trace = []
        for i in range(len(row)):
            if (len(row[i]) > 10 and row[i][start] != 'Start' and not 's' in row[i][start]):
                if 'HtoD' in row[i][name] :
                    time_H_D += float(row[i][duration])
                    if row[i][throughput] =='':
                        continue
                    transfer_trace.append([float(row[i][start]), float(row[i][duration]), 0, float(row[i][throughput])])
                elif 'DtoH' in row[i][name]  :
                    if row[i][throughput] =='':
                        continue
                    time_D_H += float(row[i][duration])
                    transfer_trace.append([float(row[i][start]), float(row[i][duration]), 1, float(row[i][throughput])])
                elif 'DtoD' in row[i][name]  :
                    if row[i][throughput] =='':
                        continue
                    time_D_H += float(row[i][duration])
                    transfer_trace.append([float(row[i][start]), float(row[i][duration]), 2, float(row[i][throughput])])
                elif 'CUDA memset' in row[i][name] or 'CUDA memcpy' in row[i][name] or 'Unified Memory' in row[i][name]:
                    continue
                
                else:
                    if len(row[i][duration]):
                        #print(row[i])
                        time_K += float(row[i][duration])
                        kernel_trace.append([float(row[i][start]), float(row[i][duration]),
                                             float(row[i][grid_x])*float(row[i][block_x]),
                                             float(row[i][grid_y])*float(row[i][block_y]),
                                             float(row[i][grid_z])* float(row[i][block_z])])
        time = time_K + time_D_D + time_D_H + time_H_D
        with open(transfer_outName, "w", newline='') as csvfile:
            csvwriter = csv.writer(csvfile, delimiter=',')
            csvwriter.writerow(["start", "duration", "direction", "throughput"])
            for i in range(len(transfer_trace)):
                csvwriter.writerow(transfer_trace[i])
            csvfile.close()

        with open(kernel_outName, "w", newline='') as csvfile:
            csvwriter = csv.writer(csvfile, delimiter=',')
            csvwriter.writerow(["start", "duration", "threads_x", "threads_y", "threads_z"])
            for i in range(len(kernel_trace)):
                csvwriter.writerow(kernel_trace[i])
            csvfile.close()
            
            
        f.close()
    if time == 0:
        return [-1,-1,-1,-1]
    return [time_K, time_H_D, time_D_H, time_D_D]

def trace_read(outFile, app_list, pathfolder):
    data = []
    for app in app_list:
        print(app)
        fileName = os.path.join(pathfolder, "{}.mem_output".format(app))
        if not os.path.exists(fileName):
            print(fileName)
            continue
        kernel_outName = os.path.join(pathfolder, "{}_kernel.csv".format(app))
        transfer_outName = os.path.join(pathfolder, "{}_transfer.csv".format(app))
        temp = trace_read_C(fileName, kernel_outName, transfer_outName)
        temp.insert(0, app)
        data.append(temp)
    columns= ["app", "kernel", "H_D", "D_H", "D_D"]  
    df = pd.DataFrame(data=data, columns=columns)
    #df.to_csv(outFile, index=False )

def transfer_Data(fileName, outFile):
    data = pd.read_excel(fileName) 
    for col in data.columns:
        data = data[data[col] != -1]
    writer = pd.ExcelWriter(outFile,engine='xlsxwriter')
    data.to_excel(writer, sheet_name="mem_trace", index=False)
    writer.save()
def combine_data(pathfolder, app_list, outFile):
    writer = pd.ExcelWriter(outFile,engine='xlsxwriter')
    data = []
    i = 0
    
    for app in app_list:
        fileName =  os.path.join(pathfolder, "{}.csv".format(app))
        data_temp = pd.read_csv(fileName)
        data_temp["app"] = app
        data_temp["ID"] = i
        data.append(data_temp)
        i+=1
        
        if  i%200 == 0:        
            outData = pd.concat(data)    
            outData.to_excel(writer, sheet_name="{}-{}".format(i-199, i), index=False)
            data = []
        elif i==len(app_list):
            if i >= 200:
                x = i -199
            else:
                x=1
            outData = pd.concat(data)    
            outData.to_excel(writer, sheet_name="{}-{}".format(x, i), index=False)
    writer.save()

def extractFeature(dataName, outName):
    xlData = pd.ExcelFile(dataName)
    data = []
    for sheets in xlData.sheet_names:
        dataX = xlData.parse( sheet_name=sheets) 
        data.append(dataX)
    data = pd.concat(data)    
    dataX = data.drop(columns=["app"])
    extracted_features = extract_features(dataX, column_id="ID", column_sort="start")
    extracted_features.to_csv(outName, index=False)


def process_Data(pathfolder, app_list):
    
    memFile = "memRatio-{}-{}.csv".format(category, arch)
    print("start")
    trace_read(memFile, app_list, pathfolder)
    print("end")

if __name__=="__main__":        
    category = "risky"
    for category in ["mybench", "risky"]:#, "risky"]:
        for arch in ["v100"]:#, "v100", "k40"]:
    #arch ="v100"/home/pzou/projects/Power_Signature/results/Turing/mem_trace-combine
            pathfolder = r"/home/pzou/projects/Power_Signature/results_backup/%s/%s/mem_trace-combine"%(category, arch)
            app_list = get_app_list("/home/pzou/projects/Power_Signature/Scripts/applications-mem_%s.csv"%(category))
    
            memFile = "memRatio-{}-{}.csv".format(category, arch)
            print("start")
            trace_read(memFile, app_list, pathfolder)

            combineFile = "{}/mem-{}-{}.xlsx".format(arch,category, arch)
            #combine_data(pathfolder, app_list, combineFile)
            print("end")

start
alignedTypes1
alignedTypes2
alignedTypes3
alignedTypes4
alignedTypes5
alignedTypes6
alignedTypes7
alignedTypes8
alignedTypes9
alignedTypes10
atax1
atax2
atax3
atax4
atax5
atax6
atax7
atax8
atax9
atax10
b+tree1
b+tree2
b+tree3
b+tree4
b+tree5
b+tree6
b+tree7
b+tree8
b+tree9
b+tree10
b+tree11
backprop1
backprop2
backprop3
backprop4
backprop5
backprop6
backprop7
backprop8
backprop9
backprop10
bfs1
bfs2
bfs3
bfs4
bfs5
bfs6
bfs7
bfs8
bfs9
bfs10
bicg1
bicg2
bicg3
bicg4
bicg5
bicg6
bicg7
bicg8
bicg9
bicg10
bicg11
binomialOptions1
binomialOptions2
binomialOptions3
binomialOptions4
binomialOptions5
binomialOptions6
binomialOptions7
binomialOptions8
binomialOptions9
binomialOptions10
BlackScholes1
BlackScholes2
BlackScholes3
BlackScholes4
BlackScholes5
BlackScholes6
BlackScholes7
BlackScholes8
BlackScholes9
BlackScholes10
BlackScholes11
c++11_cuda1
c++11_cuda2
c++11_cuda4
c++11_cuda5
c++11_cuda6
c++11_cuda7
c++11_cuda8
c++11_cuda9
c++11_cuda10
c++11_cuda11
cdpAdvancedQuicksort1
cdpAdvanced

blake2b10
brutehash1
brutehash2
brutehash3
brutehash4
brutehash5
brutehash6
brutehash7
brutehash8
brutehash9
brutehash10
cuda-hash1
cuda-hash2
cuda-hash3
cuda-hash4
cuda-hash5
cuda-hash6
cuda-hash7
cuda-hash8
cuda-hash9
cuda-hash10
cuda-hash11
cudaSha2561
cudaSha2562
cudaSha2563
cudaSha2564
cudaSha2565
cudaSha2566
cudaSha2567
cudaSha2568
cudaSha2569
cudaSha25610
cudaSha25611
des1
des2
des3
des4
des5
des6
des7
des8
des9
des10
hashiru1
hashiru2
hashiru3
hashiru4
hashiru5
hashiru6
hashiru7
hashiru8
hashiru9
hashiru10
hashrecoverer1
hashrecoverer2
hashrecoverer3
hashrecoverer4
hashrecoverer5
hashrecoverer6
hashrecoverer7
hashrecoverer8
hashrecoverer9
hashsums1
hashsums2
hashsums3
hashsums4
hashsums5
hashsums6
hashsums7
hashsums8
hashsums9
hashsums10
image-encryption1
image-encryption2
image-encryption4
image-encryption5
image-encryption6
image-encryption7
image-encryption8
image-encryption9
image-encryption10
mario1
mario2
mario3
mario4
mario6
mario7
mario8
mario9
mario10
md5-bruteforcer1
