In [23]:
import os
import sys
import re
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
import numpy as np
import openpyxl
import pandas as pd
import math
import itertools
import matplotlib.patches as patches
import seaborn as sns
from decimal import Decimal
from csv import reader
numeric_const_pattern = r'[-+]? (?: (?: \d* \. \d+ ) | (?: \d+ \.? ) )(?: [Ee] [+-]? \d+ ) ?'
rx = re.compile(numeric_const_pattern, re.VERBOSE)

def get_metrics_list(fileName):
    """get metrics list

    Arguments:
        fileName {string} -- the file name of metric file with absolute path

    Returns:
        data {list} -- the metric  list
    """
    data = []
    with open(fileName) as f:
        for line in f.readlines():
            if not line.startswith('#'):
                metric=line.split(',')[0].strip()
                data.append(metric)
        f.close()
    return data

def get_app_list(fileName):
    """get metrics list

    Arguments:
        fileName {string} -- the file name of application file with absolute path

    Returns:
        app {list} -- the app  list
    """
    apps = []
    with open(fileName) as f:
        for line in f.readlines():
            if not line.startswith('#'):
                words = line.strip().split(',')
                app = words[0].strip()
                app_num = words[1].strip()
                apps.append(app+app_num)
        f.close()
    return apps
def get_kernel_names(fileName):
    """Get the list of kernels

    Arguments:
        fileName {string} -- the file name with absolute path

    Returns:
        data {list} -- the kernel names list
    """
    data = []
    with open(fileName) as f:
        lines = f.readlines()
        nLine = len(lines)
        startX = 0
        for line in lines:
            if line.rstrip("\r\n") == '"Device","Kernel","Invocations","Metric Name","Metric Description","Min","Max","Avg"':
                break
            startX += 1
                
        if startX == nLine:   
            return False
        for i in range(startX+1, nLine):
            temp = list(reader([lines[i].rstrip("\r\n")],delimiter=','))[0]
            if len(temp) != 8 and nLine == startX +1:
                return False
            elif len(temp) != 8:
                continue
                
            else:
                data.append(list(reader([lines[i].rstrip("\r\n")],delimiter=','))[0][1])
        f.close()
    return data

def perf_read(app_list, metrics, pathfolder, errorFile, outFile):
    data = []
    errorData = []
    for app in app_list:
        kernel_list = get_kernel_names(os.path.join(pathfolder, app, "flop_count_sp_add.perf.txt"))
        if not kernel_list:
            print(app)
            continue
        data_rows = []
        for kernel in kernel_list:
            temp_row = []
            temp_row.append(app)
            temp_row.append(kernel)
            data_rows.append(temp_row)
        for metric in metrics:
            metric_file = "{}.perf.txt".format(metric)
            if not os.path.exists(os.path.join(pathfolder, app, metric_file)):
                print(app, metric)
                errorData.append([app, metric, "Non Exist"])
                continue
            if os.stat(os.path.join(pathfolder, app, metric_file)).st_size == 0:
                print(app, metric)
                errorData.append([app, metric, "Size 0"])
                continue
            temp_data, errorRecord = perf_read_C(os.path.join(pathfolder, app, metric_file), len(kernel_list))
            if errorRecord != "No Error":
                errorData.append([app, metric, errorRecord])
            for i in range(len(kernel_list)):
                data_rows[i].append(temp_data[i])
        for i in range(len(kernel_list)):
            data.append(data_rows[i])

    columns= metrics
    columns.insert(0, "app")
    columns.insert(1, "kernel")
    df = pd.DataFrame(data=data, columns=columns)
    writer = pd.ExcelWriter(outFile,engine='xlsxwriter')
    df.to_excel(writer, sheet_name="perf", index=False)
    writer.save()
    
    
    df = pd.DataFrame(data=errorData, columns=["app", "metric", "errorType"])

    df.to_csv(errorFile, index=False)
  
def perf_read_C(fileName, n):
    """This is a function to extract GPU metrics from output raw file

    Arguments:
        fileName {string} -- the file name with absolute path
        n {integter} -- the number of kernels for the application

    Returns:
        data {list} -- the extracted metrics
    """
    data = []
    errorRecord = "Unknown"
    with open(fileName) as f:
      

        lines = f.readlines()
        nLine = len(lines)
        if nLine == 1:
            if lines[0].rstrip("\r\n") == "======== Error: internal error.":
                #rint(fileName.split("/")[-2],fileName.split("/")[-1], "Internel error")
                for i in range(0, n):
                    data.append('Internal Error')
                errorRecord = 'Internal Error'
            elif lines[0].rstrip("\r\n") == "======== Error: Application returned non-zero code 1":
                #rint(fileName.split("/")[-2],fileName.split("/")[-1], "non-zero")
                for i in range(0, n):
                    data.append('non-zero')
                errorRecord = 'non-zero'
            else:
                print(fileName.split("/")[-2],fileName.split("/")[-1], "other")
                for i in range(0, n):
                    data.append('other')
                errorRecord = 'other'
        elif nLine <= 5:
           #print(fileName.split("/")[-2],fileName.split("/")[-1], "error 2")
            #print(lines)
       
            errorType = 'Error 2'
            for line in lines:
                if "Internal profiling error" in line:
                    errorType = "Internal profiling error"
                    break
                elif "cannot be found" in line:
                    errorType = "No metric"
                    break
                elif "No events/metrics were profiled" in line:
                    errorType = "No metric"
                    break
                else: 
                    pass
                        
            for i in range(0, n):
                data.append(errorType)
            errorRecord = errorType
        else:
            startX = 0
            for line in lines:
                if line.rstrip("\r\n") == '"Device","Kernel","Invocations","Metric Name","Metric Description","Min","Max","Avg"':
                    break
                startX += 1
                
            if startX == nLine:     
                errorType = 'Error 3'
                for line in lines:
                    if "No events/metrics were profiled" in line:
                        errorType = "No metric"
                     
                        break
                for i in range(0, n):
                    data.append(errorType)
                errorRecord = errorType
            else:
                if nLine >= startX+1+n:
                    for i in range(startX+1, startX+1+n):
                        temp = list(reader([lines[i].rstrip("\r\n")],delimiter=','))[0]
                        if len(temp) != 8:
                            if "overflowed" in lines[i]:
                                data.append('overflowed')
                                errorRecord ="overflowed"
                            else:
                                data.append('Error 4')
                                errorRecord ="Error 4"
                                
                        else:
                            data.append(list(reader([lines[i].rstrip("\r\n")],delimiter=','))[0][7])
                            errorRecord ="No Error"
                else:
                    for i in range(0, n):
                        data.append("No enough kernels")
                    errorRecord ="No enough kernels"
        f.close()
    return data, errorRecord


def process_Data(category,arch):
    
    pathfolder = "/home/pzou/projects/Power_Signature/results/{}/{}/perf-combine".format(category, arch)
    app_list = get_app_list("/home/pzou/projects/Power_Signature/Scripts/applications_{}.csv".format(category))
    metrics = get_metrics_list("/home/pzou/projects/Power_Signature/Scripts/metrics.csv")
    outFile = "perf-{}-{}.xlsx".format( category,arch)
    errorFile = "perfError-{}-{}.csv".format( category,arch)
    print("start")
    perf_read(app_list, metrics, pathfolder,errorFile, outFile)
    print("done")
    
def transfer_Data(fileName, arch):
    errorList = ["Error 4", "Error 5", "Internal profiling error", "No metric"]
    nmList = []
    if arch == "p100":
        nmList = ["tex_utilization"]
    if arch =="K40":
        nmList = ["branch_efficiency", "global_hit_rate", "local_hit_rate", "l2_tex_write_hit_rate", "l2_tex_write_throughput",
         "l2_tex_write_transactions",	"flop_count_hp",	"flop_count_hp_add",	"flop_count_hp_mul",
         	"flop_count_hp_fma",	"inst_fp_16", "shared_utilization", "special_fu_utilization", "half_precision_fu_utilization", 
             "single_precision_fu_utilization", "double_precision_fu_utilization", "flop_hp_efficiency"] 

    data = pd.read_excel(fileName)
    for col in nmList:
        data = data.drop(columns=col)  #at least for p100
    for col in data.columns:
        data = data[~data[col].isin(errorList)]
    for col in data.columns:
        if col in ["app", "kernel"]:
            continue
        tempCol = []
        for i in data[col]:
            i = str(i)
            x = float(rx.findall(i)[0])
            if "%" in i:
                pass  #do nothing
            if "MB" in i:
                pass  #do noting, the default is MB, MB/s
            elif "GB" in i:
                x = x * 1000.0
            elif "KB" in i:
                x = x / 1000.0
            elif "B" in i:
                x = x /1000000.0
            else:
                pass #do nothing
            tempCol.append(x)
        data[col] = tempCol

    outFile = fileName.replace('.xlsx', 'fine.csv')
    data.to_csv(outFile, index=False)
 

if __name__=="__main__":
    print("start")
    #process_Data()
    category="risky"
    arch="k40"
    process_Data(category,arch)
    #fileName = 'perf-mybench-%s.xlsx'%arch
    #transfer_Data(fileName, arch)
    print("done")


start
start
brutehash1 gst_requested_throughput.perf.txt other
brutehash1 l2_write_throughput.perf.txt other
brutehash1 atomic_transactions_per_request.perf.txt other
brutehash2 shared_store_transactions.perf.txt other
brutehash2 local_hit_rate.perf.txt other
brutehash2 gst_requested_throughput.perf.txt other
brutehash2 l2_write_throughput.perf.txt other
brutehash2 stall_constant_memory_dependency.perf.txt other
brutehash2 atomic_transactions_per_request.perf.txt other
brutehash2 half_precision_fu_utilization.perf.txt other
brutehash3 shared_store_transactions.perf.txt other
brutehash3 gst_requested_throughput.perf.txt other
brutehash3 l2_write_throughput.perf.txt other
brutehash3 atomic_transactions_per_request.perf.txt other
brutehash4 local_hit_rate.perf.txt other
brutehash4 gst_requested_throughput.perf.txt other
brutehash4 l2_write_throughput.perf.txt other
brutehash4 atomic_transactions_per_request.perf.txt other
brutehash4 half_precision_fu_utilization.perf.txt other
brutehash5 