In [1]:
from __future__ import print_function
from optparse import OptionParser
import re
import os
import subprocess
import sys
import math
import glob
import yaml
import time
from pprint import pprint
from pathlib import Path

In [14]:
this_directory = Path("/home/roman/dev/gpgpusims/notebooks/gpgpusim")
this_directory = Path("/Users/roman/dev/gpgpusims/notebooks/gpgpusim")

In [15]:
def parse_app_definition_yaml( def_yml, apps ):
    benchmark_yaml = yaml.load(open(def_yml), Loader=yaml.FullLoader)
    for suite in benchmark_yaml:
        apps[suite] = []
        for exe in benchmark_yaml[suite]['execs']:
            exe_name = list(exe.keys())[0]
            args_list = list(exe.values())[0]
            count = 0
            for runparms in args_list:
                args = runparms["args"]
                if "accel-sim-mem" not in runparms:
                    runparms["accel-sim-mem"] = "4G"
                apps[suite + ":" + exe_name + ":" + str(count) ] = []
                apps[suite + ":" + exe_name + ":" + str(count) ].append( ( benchmark_yaml[suite]['exec_dir'],
                                    benchmark_yaml[suite]['data_dirs'],
                                    exe_name, [args]) )
                count += 1
            apps[suite].append(( benchmark_yaml[suite]['exec_dir'],
                                 benchmark_yaml[suite]['data_dirs'],
                                 exe_name, args_list ))
            apps[suite + ":" + exe_name] = []
            apps[suite + ":" + exe_name].append( ( benchmark_yaml[suite]['exec_dir'],
                                 benchmark_yaml[suite]['data_dirs'],
                                 exe_name, args_list ) )

In [16]:
def parse_config_definition_yaml( def_yml, defined_baseconfigs, defined_xtracfgs ):
    configs_yaml = yaml.load(open( def_yml ), Loader=yaml.FullLoader)
    for config in configs_yaml:
        if 'base_file' in configs_yaml[config]:
            defined_baseconfigs[config] = os.path.expandvars(configs_yaml[config]['base_file'])
        elif 'extra_params' in configs_yaml[config]:
            defined_xtracfgs[config] = configs_yaml[config]['extra_params']

In [17]:
def load_defined_yamls():
    defined_apps = {}
    defined_baseconfigs = {}
    defined_xtracfgs = {}
    
    define_yamls = glob.glob(os.path.join(this_directory, 'apps/define-*.yml'))
    for def_yaml in define_yamls:
        parse_app_definition_yaml( os.path.join(this_directory, 'apps', def_yaml), defined_apps)
    define_yamls = glob.glob(os.path.join(this_directory, 'configs/define-*.yml'))
    for def_yaml in define_yamls:
        parse_config_definition_yaml( os.path.join(this_directory, 'configs', def_yaml), defined_baseconfigs, defined_xtracfgs )
    
    return defined_apps, defined_baseconfigs, defined_xtracfgs

In [18]:
(defined_apps, defined_baseconfigs, defined_xtracfgs) = load_defined_yamls()
# pprint(defined_apps["rodinia_2.0-ft"])
# pprint(defined_baseconfigs)
# pprint(defined_xtracfgs)

In [19]:
class PathMissing(Exception):
    pass

In [20]:
def get_cuda_version(this_directory):
    # Get CUDA version
    return "10.1"
    # we patch this to match the CUDA version we used in the docker container
    nvcc_out_filename = os.path.join( this_directory, "nvcc_out.{0}.txt".format(os.getpid()) )
    nvcc_out_file = open(nvcc_out_filename, 'w+')
    subprocess.call(["nvcc", "--version"],\
                   stdout=nvcc_out_file)
    nvcc_out_file.seek(0)
    cuda_version = re.sub(r".*release (\d+\.\d+).*", r"\1", nvcc_out_file.read().strip().replace("\n"," "))
    nvcc_out_file.close()
    os.remove(nvcc_out_filename)
    os.environ['CUDA_VERSION'] = cuda_version
    return cuda_version

In [21]:
def dir_option_test(name, default, this_directory):
    name = os.path.expandvars(name)
    if name == "":
        name = os.path.join(this_directory, default)
    if not os.path.isdir(name):
        name = os.path.join(os.getcwd(), name)
        if not os.path.isdir(name):
            raise PathMissing("Error - directory test fails for {0}".format(name))
    return name

In [22]:
# This function exists so that this file can accept both absolute and relative paths
# If no name is provided it sets the default
# Either way it does a test if the absolute path exists and if not, tries a relative path
def file_option_test(name, default, this_directory):
    name = os.path.expandvars(name)
    if name == "":
        if default == "":
            return ""
        else:
            name = os.path.join(this_directory, default)
    try:
        with open(name): pass
    except IOError:
        name = os.path.join(os.getcwd(), name)
        try:
            with open(name): pass
        except IOError:
            raise PathMissing("Error - cannot open file {0}".format(name))
    return name

In [24]:
cuda_version = get_cuda_version( this_directory )

class GetStatsOptions:
    run_dir = os.path.join(this_directory, "../../analyze/sim_run_%s/" % cuda_version)
    stats_yml = ""
    configs_list = "QV100-PTX"
    benchmark_list = "rodinia_2.0-ft"
    per_kernel = True
    kernel_instance = False
    configs_as_rows = True
    do_averages = False

get_stats_options = GetStatsOptions()
assert Path(get_stats_options.run_dir).is_dir()

In [25]:
get_stats_options.run_dir = dir_option_test( get_stats_options.run_dir, os.path.join(this_directory, "../../sim_run_%s/" % cuda_version),
                                          this_directory )
if not os.path.isdir(get_stats_options.run_dir):
    exit(get_stats_options.run_dir + " does not exist - specify the run directory where the benchmark/config dirs exist")

get_stats_options.stats_yml = file_option_test( get_stats_options.stats_yml, os.path.join( this_directory, "stats", "example_stats.yml" ),
                                            this_directory )
stats_to_pull = {}
stats_yaml = yaml.load(open(get_stats_options.stats_yml), Loader=yaml.FullLoader )
for stat in stats_yaml['collect_aggregate']:
    stats_to_pull[stat] = re.compile(stat), "agg"

for stat in stats_yaml['collect_abs']:
    stats_to_pull[stat] = re.compile(stat), "abs"

for stat in stats_yaml['collect_rates']:
    stats_to_pull[stat] = re.compile(stat), "rate"

In [26]:
# Test to see if the passed config adheres to any defined configs and add it to the configrations to run/collect.
def get_config(name, defined_baseconfigs, defined_xtracfgs):
    tokens = name.split('-')
    if tokens[0] not in defined_baseconfigs:
        print("Could not fined {0} in defined basenames {1}".format(tokens[0], defined_baseconfigs))
        return None
    else:
        config = (name, "", defined_baseconfigs[tokens[0]])
    for token in tokens[1:]:
        if token not in defined_xtracfgs:
            print("Could not find {0} in defined xtraconfigs {1}".format(token, defined_xtracfgs))
            return None
        else:
            oldName, oldXtra, oldBasename = config
            config = \
                (oldName, oldXtra + "\n#{0}\n{1}\n".format(token, defined_xtracfgs[token]), oldBasename)
    return config

def gen_apps_from_suite_list( app_list ):
    benchmarks = []
    for app in app_list:
        benchmarks += defined_apps[app]
    return benchmarks

def gen_configs_from_list( cfg_list ):
    configs = []
    for cfg in cfg_list:
        configs.append(get_config(cfg, defined_baseconfigs, defined_xtracfgs))
    return configs

In [27]:
def get_argfoldername( args ):
    if args == "" or args == None:
        return "NO_ARGS"
    else:
        foldername = re.sub(r"[^a-z^A-Z^0-9]", "_", str(args).strip())
        # For every long arg lists - create a hash of the input args
        if len(str(args)) > 256:
            foldername = "hashed_args_" + hashlib.md5(args).hexdigest()
        return foldername

In [28]:
apps_and_args = []
configs = []

if get_stats_options.configs_list != "" and get_stats_options.benchmark_list != "":
    for app in gen_apps_from_suite_list(get_stats_options.benchmark_list.split(",")):
        a,b,exe_name,args_list = app
        for argpair in args_list:
            args = argpair["args"]
            apps_and_args.append( os.path.join(exe_name, get_argfoldername(args) ) )
    for config, params, gpuconf_file in gen_configs_from_list( get_stats_options.configs_list.split(",") ):
        configs.append( config )

# pprint(apps_and_args)

In [29]:
all_named_kernels = {}
specific_jobIds = {}
stat_map = {}
files_parsed = 0
bytes_parsed = 0

for idx, app_and_args in enumerate(apps_and_args):
    all_named_kernels[app_and_args] = []
    for config in configs:
        # now get the right output file
        output_dir = os.path.join(get_stats_options.run_dir, app_and_args, config)
        if not os.path.isdir( output_dir ):
            print("WARNING the outputdir " + output_dir + " does not exist", file=sys.stderr)
            continue
            
        if config + app_and_args in specific_jobIds:
            jobId,jobname = specific_jobIds[ config + app_and_args ]
            torque_submname = re.sub(r".*\.([^\s]*-commit-.*)", r"\1", jobname)
            outfile = os.path.join(output_dir, exes_and_args[idx].replace("/", "-") + "." +\
               torque_submname + "." + "o" + jobId)
        else:
            all_outfiles = [os.path.join(output_dir, f) \
                           for f in os.listdir(output_dir) if(re.match(r'.*\.o[0-9]+',f))]
            if len(all_outfiles) != 0:
                outfile = max(all_outfiles, key=os.path.getmtime)
            else:
                continue

        stat_found = set()

        if not os.path.isfile( outfile ):
            print("WARNING - " + outfile + " does not exist", file=sys.stderr)
            continue
            
        # Do a quick 100-line pass to get the GPGPU-Sim Version number
        MAX_LINES = 100
        count = 0
        f = open(outfile)
        for line in f:
            count += 1
            if count >= MAX_LINES:
                break
            gpgpu_build_match = re.match(".*GPGPU-Sim.*\[build\s+(.*)\].*", line)
            if gpgpu_build_match:
                stat_map["all_kernels" + app_and_args + config + "GPGPU-Sim-build"] = gpgpu_build_match.group(1)
                break
            accelsim_build_match = re.match("Accel-Sim.*\[build\s+(.*)\].*", line)
            if accelsim_build_match:
                stat_map["all_kernels" + app_and_args + config + "Accel-Sim-build"] = accelsim_build_match.group(1)
        f.close()
        
        # Do a quick 10000-line reverse pass to make sure the simualtion thread finished
        SIM_EXIT_STRING = "GPGPU-Sim: \*\*\* exit detected \*\*\*"
        exit_success = False
        MAX_LINES = 10000
        BYTES_TO_READ = int(250 * 1024 * 1024)
        count = 0
        f = open(outfile)
        fsize = int(os.stat(outfile).st_size)
        if fsize > BYTES_TO_READ:
            f.seek(0, os.SEEK_END)
            f.seek(f.tell() - BYTES_TO_READ, os.SEEK_SET)
        lines = f.readlines()
        for line in reversed(lines):
            count += 1
            if count >= MAX_LINES:
                break
            exit_match = re.match(SIM_EXIT_STRING, line)
            if exit_match:
                exit_success = True
                break
        del lines
        f.close()

        if not exit_success:
            print("WARNING - Detected that {0} does not contain a terminating string from GPGPU-Sim. The output is potentially invalid".format(outfile),
                file=sys.stderr)
            if not options.ignore_failures:
                continue
        
        if not get_stats_options.per_kernel:
            if len(all_named_kernels[app_and_args]) == 0:
                all_named_kernels[app_and_args].append("final_kernel")
            BYTES_TO_READ = int(250 * 1024 * 1024)
            count = 0
            f = open(outfile)
            fsize = int(os.stat(outfile).st_size)
            files_parsed += 1
            if fsize > BYTES_TO_READ:
                f.seek(0, os.SEEK_END)
                f.seek(f.tell() - BYTES_TO_READ, os.SEEK_SET)
                bytes_parsed += BYTES_TO_READ
            else:
                bytes_parsed += fsize
            lines = f.readlines()
            for line in reversed(lines):
                # pull out some stats
                for stat_name, tup in stats_to_pull.items():
                    token, statType = tup
                    if stat_name in stat_found:
                        continue
                    existance_test = token.search( line.rstrip() )
                    if existance_test != None:
                        stat_found.add(stat_name)
                        number = existance_test.group(1).strip()
                        stat_map["final_kernel" + app_and_args + config + stat_name] = number
                if len(stat_found) == len(stats_to_pull):
                    break
            del lines
            f.close()
        else:
            current_kernel =""
            last_kernel = ""
            raw_last = {}
            running_kcount = {}
            files_parsed += 1
            bytes_parsed += os.stat(outfile).st_size
            f = open(outfile)
            #print("Parsing File {0}. Size: {1}".format(outfile, millify(os.stat(outfile).st_size)))
            for line in f:
                # If we ended simulation due to too many insn - ignore the last kernel launch, as it is no complete.
                # Note: This only appies if we are doing kernel-by-kernel stats
                last_kernel_break = re.match("GPGPU-Sim: \*\* break due to reaching the maximum cycles \(or instructions\) \*\*", line)
                if last_kernel_break:
                    print("NOTE::::: Found Max Insn reached in {0} - ignoring last kernel.".format(outfile), file=sys.stderr)
                    for stat_name in stats_to_pull.keys():
                        if current_kernel + app_and_args + config + stat_name in stat_map:
                            del stat_map[current_kernel + app_and_args + config + stat_name]

                kernel_match = re.match("kernel_name\s+=\s+(.*)", line);
                if kernel_match:
                    last_kernel = current_kernel
                    current_kernel = kernel_match.group(1).strip()

                    if get_stats_options.kernel_instance:
                        if current_kernel not in running_kcount:
                            running_kcount[current_kernel] = 0
                        else:
                            running_kcount[current_kernel] += 1
                        current_kernel += "--" + str(running_kcount[current_kernel])

                    if current_kernel not in all_named_kernels[app_and_args]:
                        all_named_kernels[app_and_args].append(current_kernel)

                    if current_kernel + app_and_args + config + "k-count" in stat_map:
                        stat_map[current_kernel + app_and_args + config + "k-count"] += 1
                    else:
                        stat_map[current_kernel + app_and_args + config + "k-count"] = 1
                    continue

                for stat_name, tup in stats_to_pull.items():
                    token, statType = tup
                    existance_test = token.search( line.rstrip() )
                    if existance_test != None:
                        stat_found.add(stat_name)
                        number = existance_test.group(1).strip()
                        if statType != "agg":
                            stat_map[current_kernel + app_and_args + config + stat_name] = number
                        elif current_kernel + app_and_args + config + stat_name in stat_map:
                            if stat_name in raw_last:
                                stat_last_kernel = raw_last[stat_name]
                            else:
                                stat_last_kernel = 0.0
                            raw_last[ stat_name ] = float(number)
                            stat_map[current_kernel + app_and_args + config + stat_name] += ( float(number) - stat_last_kernel )
                        else:
                            if last_kernel + app_and_args + config + stat_name in stat_map:
                                stat_last_kernel = raw_last[stat_name]
                            else:
                                stat_last_kernel = 0.0
                            raw_last[stat_name] = float(number)
                            stat_map[current_kernel + app_and_args + config + stat_name] = ( float(number) - stat_last_kernel )

In [30]:
# pprint(list(stat_map.keys()))
# pprint(stat_map)
bfs = {k: v for k, v in stat_map.items() if "bfs-rodinia" in k}
# pprint(bfs)
if get_stats_options.per_kernel:
    pprint(set([k.split("/")[0].rstrip("_bfs-rodinia-2.0-ft") for k in bfs.keys()]))
    pprint({
        k.split("/")[0].rstrip("bfs-rodinia-2.0-ft") + k.split("-")[-1]: v
        for k, v in bfs.items()
    })
else:
    pprint({
        k.lstrip("final_kernelbfs-rodinia-2.0-ft/__data_graph4096_txt___data_graph4096_result_txtQV100-PTX"): v
        for k, v in bfs.items()
    })

{'_Z7Kernel2PbS_S_S', '_Z6KernelP4NodePiPbS2_S2_S1', 'all_kernel'}
{'_Z6KernelP4NodePiPbS2_S2_S1_9]+) sec\\).*': 63.0,
 '_Z6KernelP4NodePiPbS2_S2_S1_PTXL2_BW\\s*=\\s*(.*)+GB\\/Sec': '',
 '_Z6KernelP4NodePiPbS2_S2_S1_PTX\\s+L2_cache_stats_breakdown\\[GLOBAL_ACC_R\\]\\[HIT\\]\\s*=\\s*(.*)': 13735.0,
 '_Z6KernelP4NodePiPbS2_S2_S1_PTX\\s+L2_cache_stats_breakdown\\[GLOBAL_ACC_R\\]\\[TOTAL_ACCESS\\]\\s*=\\s*(.*)': 13735.0,
 '_Z6KernelP4NodePiPbS2_S2_S1_PTX\\s+L2_cache_stats_breakdown\\[GLOBAL_ACC_W\\]\\[HIT\\]\\s*=\\s*(.*)': 15818.0,
 '_Z6KernelP4NodePiPbS2_S2_S1_PTX\\s+L2_cache_stats_breakdown\\[GLOBAL_ACC_W\\]\\[TOTAL_ACCESS\\]\\s*=\\s*(.*)': 15818.0,
 '_Z6KernelP4NodePiPbS2_S2_S1_PTX\\s+Total_core_cache_stats_breakdown\\[GLOBAL_ACC_R\\]\\[HIT\\]\\s*=\\s*(.*)': 56226.0,
 '_Z6KernelP4NodePiPbS2_S2_S1_PTX\\s+Total_core_cache_stats_breakdown\\[GLOBAL_ACC_R\\]\\[MSHR_HIT\\]\\s*=\\s*(.*)': 1630.0,
 '_Z6KernelP4NodePiPbS2_S2_S1_PTX\\s+Total_core_cache_stats_breakdown\\[GLOBAL_ACC_R\\]\\[TOTAL_AC

In [31]:
def print_stat(stat_name, all_named_kernels, apps_and_args, configs, stat_map, cfg_as_rows, do_averages):
    csv_str = ""
    DIVISION = "-" * 100
    if cfg_as_rows:
        num_commas = len(apps_and_args)
    else:
        num_commas = len(configs)
    if do_averages:
        num_commas += 1
    csv_str += DIVISION + ("," * num_commas) + "\n"

    running_total = 0
    total_num = 0
    if cfg_as_rows:
        csv_str += stat_name + ("," * num_commas) +  "\nCFG,"
        for appargs in apps_and_args:
            knames = all_named_kernels[appargs]
            for kname in knames:
                if kname == "":
                    continue
                csv_str += appargs + "--" + kname + ","
        if do_averages:
            csv_str += "AVG,"

        csv_str = csv_str[:-1]
        csv_str += "\n"
        for config in configs:
            csv_str += config + ","
            for appargs in apps_and_args:
                knames = all_named_kernels[appargs]
                for kname in knames:
                    if kname == "":
                        continue
                    if kname + appargs + config + stat_name in stat_map:
                        csv_str += str(stat_map[kname + appargs + config + stat_name]) + ","
                        try:
                            running_total += float(stat_map[kname + appargs + config + stat_name])
                            total_num += 1
                        except:
                            pass
                    else:
                        csv_str += "NA,"
            if do_averages:
                if total_num != 0:
                    csv_str += "{0:.1f},".format(running_total/total_num)
                else:
                    csv_str += "NA,"
            running_total = 0
            total_num = 0
            csv_str = csv_str[:-1]
            csv_str += "\n"

    else:
        csv_str += stat_name + ("," * num_commas) + "\nAPPS,"
        for config in configs:
            csv_str += config + ","

        if do_averages:
            csv_str += "AVG,"
        csv_str = csv_str[:-1]
        csv_str += "\n"
        for appargs in apps_and_args:
            knames = all_named_kernels[appargs]
            for kname in knames:
                if kname == "":
                    continue
                csv_str += appargs + "--" + kname + ","
                for config in configs:
                    if kname + appargs + config + stat_name in stat_map:
                        csv_str += str(stat_map[kname + appargs + config + stat_name]) + ","
                        try:
                            running_total += float(stat_map[kname + appargs + config + stat_name])
                            total_num += 1
                        except:
                            pass
                    else:
                        csv_str += "NA,"

                if do_averages:
                    if total_num != 0:
                        csv_str += "{0:.1f},".format(running_total/total_num)
                    else:
                        csv_str += "NA,"
                running_total = 0
                total_num = 0
                csv_str = csv_str[:-1]
                csv_str += "\n"

    csv_str = csv_str[:-1]
    csv_str += "\n"
    return csv_str

In [32]:
import io
stat_csv_file = io.StringIO()

for stat_name in ( stats_yaml['collect_aggregate'] +\
                   stats_yaml['collect_abs'] +\
                   stats_yaml['collect_rates'] ):
    stat_csv_string = print_stat(stat_name, all_named_kernels, apps_and_args, configs, stat_map,
                           get_stats_options.configs_as_rows, get_stats_options.do_averages )
    stat_csv_file.write(stat_csv_string)

In [33]:
import csv
import copy
import numpy as np
import plotly
import plotly.graph_objs as go
#import chart_studio.plotly as py
import plotly.io as pio

In [34]:
class PlotCorrelationOptions:
    hardware_dir = run_dir = os.path.join(this_directory, "../../analyze/hw_run_with_cycles/")
    data_mappings = ""
    blacklist = ""
    hardware_dict = None
    err_off = False
    rename_data = ""
    marker_order = ""
    plotname = ""
    noanno = False
    legend = 1.2

plot_correl_options = PlotCorrelationOptions()

In [35]:
plot_correl_options.hardware_dir = dir_option_test( plot_correl_options.hardware_dir, "../../hw_run/", this_directory )
plot_correl_options.data_mappings = file_option_test( plot_correl_options.data_mappings, "correl_mappings.py", this_directory )
plot_correl_options.blacklist = file_option_test( plot_correl_options.blacklist, "", this_directory )

In [36]:
def parse_hw_csv_2(csv_file, hw_data, appargs, kdata):
    cfg = None

    with open(csv_file, 'rU') as data_file:
        print("Parsing HW csv file {0}".format(csv_file))
        reader = csv.reader(data_file)        # define reader object
        state = "start"
        header = []
        kcount = 0
        for row in reader:                    # loop through rows in csv file
            if state == "start":
                if len(row) == 0:
                    continue
                if "ID" == row[0]:
                    state = "kernel_proc"
                continue
            if state == "kernel_proc":
                if len(row) == 1:
                    print("Bad line - possibly the app failed -- {0}".format(row))
                    break
                metric = row[-3]
                if metric == "device__attribute_display_name":
                    cfg = row[-1]
                    continue

                try:
                    value = float(row[-1].replace(",",""))
                except ValueError:
                    print("Bad line - possibly the app failed -- {0}".format(row))
                    continue

                if len(kdata) <= kcount:
                    kdata.append({})
                if metric not in kdata[kcount]:
                    kdata[kcount][metric] = []
                kdata[kcount][metric].append(value)
                kcount = int(row[0]) + 1

        print("Kernels found: {0}".format(kcount))

    if cfg != "" and cfg != None:
        cfg = re.sub(r" \(\d+\)$","", cfg) # Strip off the (0), (1), etc that some profiler versions put on the end of the device name
        if cfg not in hw_data:
            hw_data[cfg] = {}
        hw_data[cfg][appargs] = kdata

In [39]:
def parse_hw_csv(csv_file, hw_data, appargs, kdata):
    cfg = ""
    cfg_col = None

    with open(csv_file, 'rU') as data_file:
        print("Parsing HW csv file {0}".format(csv_file))
        reader = csv.reader(data_file)        # define reader object
        state = "start"
        header = []
        kcount = 0
        for row in reader:
            # Begin by searching for the text line that indicates the beginning of the profile dump
            if state == "start" and len(row) > 0:
                if "Profiling result" in row[0] or "==PROF== Disconnected" in row[0]:
                    state = "header_proc"
                continue

            # The frist line is a header line what indicates the place of each stat on the next line
            if state == "header_proc":
                # EDIT EDIT EDIT
                if len(row) < 1:
                    # nothing was profiled (more precisely: nsight does not support my GPU)
                    continue
                if "Event result" in row[0]:
                    continue
                header = row
                count = 0

                # get the device name column - which is a special attribute
                for elem in row:
                    if elem == "Device":
                        cfg_col = count
                    elif elem == "device__attribute_display_name":
                        cfg_col = count
                    count += 1

                state = "kernel_proc"
                continue

            # The next sequence of lines are all the kernel launches with the values for each stat
            if state == "kernel_proc":
                if len(row) == 1:
                    print("Bad line - possibly the app failed -- {0}".format(row))
                    break

                # skip the memcopies
                if "[CUDA " in "".join(row):
                    continue

                if cfg_col == None:
                    continue

                # Skip lines without a device listed
                if row[cfg_col] == "":
                    continue

                # Set the Device
                if cfg != "" and cfg != row[cfg_col]:
                    print("data for more than one device in {0}..{1}:{2}"\
                        .format(csv_file,cfg,elem))
                    exit()

                cfg = row[cfg_col]
                count = 0
                if len(kdata) <= kcount:
                    kdata.append({})
                for elem in row:
                    if header[count] not in kdata[kcount]:
                        kdata[kcount][header[count]] = []
                    try:
                        value = float(elem.replace(",",""))
                        kdata[kcount][header[count]].append(value)
                    except ValueError:
                        if "n/a" != elem:
                            kdata[kcount][header[count]].append(elem)
                    count += 1
#                logger.log("Kernel Launch {0}: HW Kernel found".format(kcount))
                kcount += 1
                continue
        print("Kernels found: {0}".format(kcount))
    if cfg != "" and cfg != None:
        cfg = re.sub(r" \(\d+\)$","", cfg) # Strip off the (0), (1), etc that some profiler versions put on the end of the device name
        if cfg not in hw_data:
            hw_data[cfg] = {}
        hw_data[cfg][appargs] = kdata

In [40]:
hw_data = {}
if plot_correl_options.hardware_dict == None:
    for root, dirs, files in os.walk(plot_correl_options.hardware_dir):
        for d in dirs:
            csv_dir = os.path.join(root, d)
            csvs = sorted(glob.glob(os.path.join(csv_dir,"*.csv*")))
            # latest_date = re.search("(.*).csv*",os.path.basename(csvs[-1])).group(1)
            # csvs = glob.glob(os.path.join(csv_dir,"{0}.csv*".format(latest_date)))
            # print("For {0}: Using Date: [{1}]. Containd {2} files\n".format(csv_dir, latest_date, len(csvs)))
            if len(csvs) == 0:
                continue
            # if "bfs-rodinia" in csv_dir:
            #     pprint(csvs)
            kdata = []
            for csvf in csvs:
                if "gpc__cycles_elapsed" in csvf:
                    parse_hw_csv_2(csvf,hw_data, os.path.join(os.path.basename(root),d), kdata)
                else:
                    parse_hw_csv(csvf,hw_data, os.path.join(os.path.basename(root),d), kdata)

# workaround: pretend we use a "TITAN V"
real_gpu = 'NVIDIA GeForce GTX 1080'
gpu = "TITAN V"
hw_data = {
    gpu: hw_data[real_gpu],
}

Parsing HW csv file /Users/roman/dev/gpgpusims/notebooks/gpgpusim/../../analyze/hw_run_with_cycles/device-0/10.1/srad_v2-rodinia-2.0-ft/__data_matrix128x128_txt_0_127_0_127__5_2___data_result_matrix128x128_1_150_1_100__5_2_txt/22.11.30-Wednesday--21:49:00.csv
Kernels found: 4
Parsing HW csv file /Users/roman/dev/gpgpusims/notebooks/gpgpusim/../../analyze/hw_run_with_cycles/device-0/10.1/srad_v2-rodinia-2.0-ft/__data_matrix128x128_txt_0_127_0_127__5_2___data_result_matrix128x128_1_150_1_100__5_2_txt/22.11.30-Wednesday--21:49:00.csv.cycle.0
Kernels found: 4
Parsing HW csv file /Users/roman/dev/gpgpusims/notebooks/gpgpusim/../../analyze/hw_run_with_cycles/device-0/10.1/srad_v2-rodinia-2.0-ft/__data_matrix128x128_txt_0_127_0_127__5_2___data_result_matrix128x128_1_150_1_100__5_2_txt/22.11.30-Wednesday--21:49:00.csv.elapsed_cycles_sm.0
Kernels found: 4
Parsing HW csv file /Users/roman/dev/gpgpusims/notebooks/gpgpusim/../../analyze/hw_run_with_cycles/device-0/10.1/srad_v2-rodinia-2.0-ft/__dat

In [41]:
# pprint(hw_data)
pprint(list(hw_data[gpu].keys()))
# pprint(hw_data[gpu]["bfs-rodinia-2.0-ft/__data_graph4096_txt___data_graph4096_result_txt"])

['srad_v2-rodinia-2.0-ft/__data_matrix128x128_txt_0_127_0_127__5_2___data_result_matrix128x128_1_150_1_100__5_2_txt',
 'backprop-rodinia-2.0-ft/4096___data_result_4096_txt',
 'hotspot-rodinia-2.0-ft/30_6_40___data_result_30_6_40_txt',
 'bfs-rodinia-2.0-ft/__data_graph4096_txt___data_graph4096_result_txt',
 'nw-rodinia-2.0-ft/128_10___data_result_128_10_txt',
 'pathfinder-rodinia-2.0-ft/1000_20_5___data_result_1000_20_5_txt',
 'nn-rodinia-2.0-ft/__data_filelist_4_3_30_90___data_filelist_4_3_30_90_result_txt',
 'heartwall-rodinia-2.0-ft/__data_test_avi_1___data_result_1_txt',
 'lud-rodinia-2.0-ft/_v__b__i___data_64_dat',
 'streamcluster-rodinia-2.0-ft/3_6_16_1024_1024_100_none_output_txt_1___data_result_3_6_16_1024_1024_100_none_1_txt']


In [42]:
def get_sim_csv_data(data_file):
    all_kerns = {}
    all_kern_cfg = {}
    stats_missing = set()
    klist = []
    # with open(filepath, 'r') as data_file:
    data_file.seek(0)
    reader = csv.reader(data_file)        # define reader object
    state = "start"
    first_stat = True
    for row in reader:                    # loop through rows in csv file
        # print(row)
        if len(row) != 0 and row[0].startswith("----"):
            state = "find-stat"
            continue
        if state == "find-stat":
            current_stat = row[0]
            print("Processing Stat {0}".format(current_stat))
            state = "find-apps"
            continue
        if state == "find-apps":
            if first_stat:
                last_appargs = ""
                for item in row[1:]:
                    split = item.split("--")
                    if len(split) > 1:
                        appargs = split[0]
                        kname = split[1]
                        if len(split) > 2:
                            kname += "--" + split[2]
                        if kname == "all_kernels":
                            continue
                        if appargs == last_appargs:
                            num += 1
                        else:
                            last_appargs = appargs
                            num = 0
                        print("Found appargs {0}. Kernel {1}".format(appargs,kname))
                        klist.append((appargs, kname, num))
                        if appargs not in all_kern_cfg:
                            all_kern_cfg[appargs] = []
                        all_kern_cfg[appargs].append({})
                        all_kern_cfg[appargs][-1]["Kernel"] = kname
            if not kname == "all_kernels":
                state = "process-cfgs"
            continue
        if state == "process-cfgs":
            if len(row) == 0:
                first_stat = False
                state = "start"
                continue
            count = 0
            cfg = row[0]
            print("Processing config: {0}".format(cfg))
            if cfg not in all_kerns:
                all_kerns[cfg] = copy.deepcopy(all_kern_cfg)
            for x in row[1:]:
                if count < len(klist):
                    appargs,kname,num = klist[count]
                else:
                    print("More row entries than kernels processed. Likely because of an average column", file=sys.stderr)
                    break
                try:
                    all_kerns[cfg][appargs][num][current_stat] = float(x)
                except ValueError:
                    all_kerns[cfg][appargs][num][current_stat] = None
                    stats_missing.add((appargs, num, current_stat))
                count += 1
    for stat in stats_missing:
        appargs, num, current_stat = stat
        for cfg in all_kerns.keys():
            del all_kerns[cfg][appargs][num][current_stat]
    return all_kerns

In [43]:
sim_data = get_sim_csv_data(stat_csv_file) # options.csv_file)

Processing Stat gpu_tot_sim_insn\s*=\s*(.*)
Found appargs backprop-rodinia-2.0-ft/4096___data_result_4096_txt. Kernel _Z22bpnn_layerforward_CUDAPfS_S_S_ii
Found appargs backprop-rodinia-2.0-ft/4096___data_result_4096_txt. Kernel _Z24bpnn_adjust_weights_cudaPfiS_iS_S_
Found appargs bfs-rodinia-2.0-ft/__data_graph4096_txt___data_graph4096_result_txt. Kernel _Z6KernelP4NodePiPbS2_S2_S1_i
Found appargs bfs-rodinia-2.0-ft/__data_graph4096_txt___data_graph4096_result_txt. Kernel _Z7Kernel2PbS_S_S_i
Found appargs hotspot-rodinia-2.0-ft/30_6_40___data_result_30_6_40_txt. Kernel _Z14calculate_tempiPfS_iiiiffffff
Found appargs heartwall-rodinia-2.0-ft/__data_test_avi_1___data_result_1_txt. Kernel _Z6kernelv
Found appargs lud-rodinia-2.0-ft/_v__b__i___data_64_dat. Kernel _Z12lud_diagonalPfii
Found appargs lud-rodinia-2.0-ft/_v__b__i___data_64_dat. Kernel _Z13lud_perimeterPfii
Found appargs lud-rodinia-2.0-ft/_v__b__i___data_64_dat. Kernel _Z12lud_internalPfii
Found appargs nw-rodinia-2.0-ft/128_1

In [44]:
# pprint(sim_data)

In [45]:
exec(open(plot_correl_options.data_mappings,'r').read())
assert correl_list is not None

pprint(config_maps)

{'GTX1080Ti': {'GeForce GTX 1080 Ti'},
 'GTX480': {'GeForce GTX 480'},
 'P100_HBM': {'Tesla P100'},
 'QV100': {'NVIDIA GeForce GTX 1080',
           'Quadro GV100',
           'TITAN V',
           'Tesla V100-SXM2-32GB'},
 'RTX2060': {'GeForce RTX 2060'},
 'RTX3070': {'GeForce RTX 3070'},
 'TITANK': {'GeForce GTX TITAN'},
 'TITANX': {'TITAN X (Pascal)'}}


In [46]:
blacklist = []
def isAppBanned( appargs, blacklist ):
    for bannedname in blacklist:
        if bannedname.match(appargs):
            return True
    return False

In [55]:
fig_data = {} # map of HW config to a list of scatters

for cfg, sim_for_cfg in sim_data.items():
    if cfg.split('-')[0] not in config_maps:
        print("cfg {0} not in config_maps:{1}.".format(cfg, config_maps))
        continue

    hw_cfg = None
    # pprint(config_maps)
    for device in hw_data.keys():
        print("Testing hw_cfg={0}".format(device))
        print("\tcfg={0} key={1}".format(cfg, cfg.split('-')[0]))

        # Each simulation config name maps to potentially more than one NVIDIA card name
        # we iterate through them all here.
        for hw_name in config_maps[cfg.split('-')[0]]:
            if hw_name in device:
                hw_cfg = device
                print("Setting hw_cfg={0}".format(device))
                continue

    if hw_cfg == None:
        print("Cannot find HW data for {0} skipping plots.".format(hw_cfg))
        continue
        
    for correl in correl_list:
        if correl.hw_name != "all" and correl.hw_name not in hw_cfg:
            # print("for cfg:{0}, hw_cfg:{1} - Skipping plot:\n{2}".format(cfg, hw_cfg, correl))
            continue
        
        print("######### CORREL: ", correl.chart_name)

        hw_array = []
        hw_error = []
        hw_error_min = []
        sim_array = []
        label_array = []
        color_array = []
        
        
        appcount = 0
        kernelcount = 0
        num_less_than_one_percent = 0
        num_less_than_ten_percent = 0
        num_under = 0
        num_over = 0
        errs = []
        sim_appargs_leftover = set(copy.deepcopy(list(sim_for_cfg.keys())))
        hw_appargs_leftover = set(copy.deepcopy(list(hw_data[hw_cfg].keys())))
        max_axis_val = 0.0
        min_axis_val = 99999999999999999999999999999.9
        err_dropped_stats = 0
        hw_low_drop_stats = 0
        apps_included = {}
        for _idx, (appargs, sim_klist) in enumerate(sim_for_cfg.items()):
            if appargs in hw_data[hw_cfg]:
                if (isAppBanned( appargs, blacklist )):
                    continue

                hw_klist = hw_data[hw_cfg][appargs]
                if _idx == 0:
                    # pprint(hw_klist)
                    # pprint(sim_klist)
                    pass
                processAnyKernels = False
                if len(sim_klist) <= len(hw_klist):
                    # print("Found hw/sim match for {0}. Sim={1}. HW={2}"
                    #     .format(appargs, len(sim_klist), len(hw_klist)))
                    sim_appargs_leftover.remove(appargs)
                    hw_appargs_leftover.remove(appargs)
                    count = 0
                    for sim in sim_klist:
                        hw = hw_klist[count]
                        try:
                            # print("Evaluating HW: {0}".format(correl.hw_eval))
                            hw_array.append(eval(correl.hw_eval))
                        except:
                            e = sys.exc_info()[0]
                            print("Potentially uncollected stat in {0}.Error: {1}".format(correl.hw_eval, e))
#                           print hw
#                           exit(1)
                            count += 1
                            continue

                        if hw_array[-1] < correl.drophwnumbelow:
                            hw_low_drop_stats += 1
                            hw_array = hw_array[:-1]
                            count += 1
                            continue

                        try:
                            sim_array.append(eval(correl.sim_eval))
                        except KeyError as e:
                            print("Potentially uncollected stat in {0}.Error: {1}".format(correl.sim_eval, e))
                            hw_array = hw_array[:-1]
                            count += 1
                            continue
                        except ZeroDivisionError as e:
                            print("Division by zero for  stat in {0}. Error: {1}".format(correl.sim_eval, e))
                            count += 1
                            hw_array = hw_array[:-1]
                            continue
 
                        if correl.hw_error != None:
                            maxe,mine = eval(correl.hw_error)
                            hw_error.append(maxe)
                            hw_error_min.append(mine)
                        else:
                            hw_error.append(0)
                            hw_error_min.append(0)


                        if appargs not in apps_included:
                            apps_included[appargs] = [];

                        processAnyKernels = True
                        err = 99999
                        hw_high = 0
                        hw_low = 999999999999

                        kernelcount += 1
                        err = sim_array[-1] - hw_array[-1]

                        if hw_array[-1] != 0:
                            hw_high = (hw_error[-1]/hw_array[-1]) * 100
                            hw_low = (hw_error_min[-1]/hw_array[-1]) * 100
                            err = (err / hw_array[-1]) * 100
                        else:
                            hw_high = 0
                            hw_low = 0
                            err = 0


                        if abs(err) < 10.0:
                            num_less_than_ten_percent += 1

                        if abs(err) < 1.0:
                            num_less_than_one_percent += 1

                        elif err > 0:
                            num_over += 1
                        else:
                            num_under += 1

                        errs.append(abs(err))
                        apps_included[appargs].append((err, sim_klist[count]["Kernel"]))

                        label_array.append((appargs + "--" + sim_klist[count]["Kernel"]) +
                            " (Err={0:.2f}%,HW-Range=+{1:.2f}%/-{2:.2f}%)".format(err, hw_high,hw_low))
                        count += 1
                        if hw_array[-1] > max_axis_val:
                            max_axis_val = hw_array[-1]
                        if sim_array[-1] > max_axis_val:
                            max_axis_val = sim_array[-1]

                        if hw_array[-1] < min_axis_val:
                            min_axis_val = hw_array[-1]
                        if sim_array[-1] < min_axis_val:
                            min_axis_val = sim_array[-1]

                else:
                    print("For appargs={0}, HW/SW kernels do not match HW={1}, SIM={2} and software has more than hardware\n"\
                        .format(appargs, len(hw_klist), len(sim_klist)))
                if processAnyKernels:
                    appcount += 1
                    
        # print("Sim apps no HW:\n{0}\nHW apps no sim data:\n{1}"\
        #     .format(sim_appargs_leftover, hw_appargs_leftover))

        # these are not bad errors but error bars?
        # this is needed to avoid divide by zero ...
        if len(errs) == 0:
            # raise ValueError(errs)
            print("bailing", correl.chart_name)
            continue

        for i in range(len(hw_array)):
            if hw_array[i] != 0:
                hw_high = (hw_error[i]/hw_array[i]) * 100
                hw_low = (hw_error_min[i]/hw_array[i]) * 100
            else:
                hw_high = 0
                hw_low = 0

        print(hw_array)
        print(sim_array)
        correl_co = np.corrcoef(hw_array, sim_array)[0][1]
        avg_err = 0
        for err in errs:
            avg_err += err
        avg_err = avg_err / len(errs)
        
        trace = go.Scatter(
            x = hw_array,
            y = sim_array,
            mode = 'markers',
            text=label_array,
            error_x=dict(
                type='data',
                symmetric=False,
                array=hw_error,
                arrayminus=hw_error_min,
                visible=True
            ),
            name=cfg,
        )
        if not plot_correl_options.err_off:
            anno = cfg + " ({0} apps, {1} kernels ({6} < 1% Err, {4} under, {5} over)) [Correl={2:.4} Err={3:.2f}%]"\
                .format(appcount, kernelcount,correl_co, avg_err,num_under,num_over,num_less_than_one_percent, num_less_than_ten_percent)
        else:
            anno = cfg + " ({0} apps, {1} kernels ({6} < 1% Err, {4} under, {5} over)) [Correl={2:.4}]"\
                .format(appcount, kernelcount,correl_co, avg_err,num_under,num_over,num_less_than_one_percent, num_less_than_ten_percent)

        layout = go.Layout(
            title=correl.chart_name,
             xaxis=dict(
                title='Hardware {1}'.format(hw_cfg, correl.chart_name),
                range=[min_axis_val * 0.9 ,max_axis_val*1.1]
            ),
            yaxis=dict(
                title='Simulation {0}'.format(correl.chart_name),
                range=[min_axis_val * 0.9 ,max_axis_val*1.1]
            ),
        )

        data = [trace]

        if (correl.plotfile, hw_cfg) not in fig_data:
            fig_data[ (correl.plotfile, hw_cfg) ] = []
        fig_data[ (correl.plotfile, hw_cfg) ].append((trace, layout, cfg, anno, correl.plotfile, err_dropped_stats, apps_included, correl, hw_low_drop_stats))
        
# pprint(fig_data)

Testing hw_cfg=TITAN V
	cfg=QV100-PTX key=QV100
Setting hw_cfg=TITAN V
######### CORREL:  Cycles
[8678.4, 13958.4, 11289.599999999999, 2572.8, 15590.400000000001, 13036.800000000001, 19776.0, 9580.8, 3724.8, 9619.2, 9542.4, 8947.199999999999, 5894.4, 9351.0, 5414.400000000001, 7584.0]
[8325.0, 7700.0, 74677.0, 44871.0, 61078.0, 8907.0, 108884.0, 60819.0, 19657.0, 109080.0, 95515.0, 28190.0, 32617.0, 15958.0, 15462.0, 1807695.0]
######### CORREL:  GPC Cycles
Potentially uncollected stat in np.average(hw["gpc__cycles_elapsed.avg"]).Error: <class 'KeyError'>
Potentially uncollected stat in np.average(hw["gpc__cycles_elapsed.avg"]).Error: <class 'KeyError'>
Potentially uncollected stat in np.average(hw["gpc__cycles_elapsed.avg"]).Error: <class 'KeyError'>
Potentially uncollected stat in np.average(hw["gpc__cycles_elapsed.avg"]).Error: <class 'KeyError'>
Potentially uncollected stat in np.average(hw["gpc__cycles_elapsed.avg"]).Error: <class 'KeyError'>
Potentially uncollected stat in np.ave

In [49]:
# pprint(fig_data)

In [50]:
def make_anno1(text, fontsize, x, y):
    return plotly.graph_objs.layout.Annotation(
        text=text,   # annotation text
        xref='paper',  # use paper coordinates
        yref='paper',  #   for both x and y coords
        x=x,           # x and y position 
        y=y,           #   in norm. coord. 
        # font=Font(size=fontsize,color='Black'),  # text font size
        showarrow=False,       # no arrow (default is True)
        bgcolor='#F5F3F2',     # light grey background color
        bordercolor='#FFFFFF', # white borders
        borderwidth=1,         # border width
        borderpad=5     # set border/text space to 1 fontsize
    )

def make_pretty_app_list(apps_included):
    ret_str = "Application + Arguments :: Number Kernels Launched :: Average Error\n\n"
    kernel_str = "Application + Arguments :: kname :: Error vs. hardware\n\n"
    app_list = apps_included.keys()
    app_list = sorted(app_list, key=lambda x: len(apps_included[x]), reverse=True)
    for app in app_list:
        if len(apps_included[app]) > 0:
            avg_err = 0.0
            for err,name in apps_included[app]:
                avg_err += abs(err)
                kernel_str += "{0} :: {1} :: {2:.2f}%\n".format(app, name, err)
            avg_err = avg_err / len(apps_included[app])
            ret_str += "{0} :: {1} :: {2:.2f}%\n".format(app, len(apps_included[app]), avg_err)
        else:
            ret_str += "{0} :: No kernels included in error calc".format(app)
            kernel_str += "{0} :: No kernels included in error calc".format(app)
    return ret_str, kernel_str

def getCorrelCsvRaw(anno, names, x, y):
    out_csv = anno + "\n"
    out_csv += "Name,Hardware,Simulator,Sim/HW\n"
    count = 0
    for k in names:
        out_csv += "{0},{1:.2f},{2:.2f},{3:.2f}\n"\
            .format(k, x[count], y[count],\
                y[count]/(x[count]+0.000001))
        count += 1
    return out_csv

def getAppData(kernels, x, y, xaxis_title, correlmap):
    count = 0
    appmax = 0
    appmin = 99999999999999999999.9
    app_map = {}
    apps = []
    newx = []
    newy = []
    total_err = 0.0
    num_over = 0
    num_under = 0
    num_less_than_one_percent = 0
    num_less_than_ten_percent = 0
    for kernel in kernels:
        app_name = kernel.split("--")[0]
        if app_name in app_map:
            tx,ty,oldk = app_map[app_name]
            app_map[app_name] = ( tx + x[count], ty + y[count],oldk+1 )
        else:
            app_map[app_name] = (x[count], y[count],1)
        count += 1
    tot_err_num = 0
    tot_x = 0
    rpds = []
    mse_num = 0

    # For rates, take the average across all the kernels in the app
    if correlmap.stattype == "rate":
        new_map = {}
        for k,v in app_map.items():
            x1,y1,numk = v
            new_map[k] = (x1 / numk, y1 / numk, numk)
        app_map = new_map

    for k,v in app_map.items():
        apps.append(k)
        x1,y1,numk = v
        newx.append(x1)
        newy.append(y1)
        tot_err_num += abs(y1 - x1)
        tot_x += x1
        if (y1 > x1):
            num_over += 1
        elif (y1 < x1):
            num_under += 1
        err = abs(y1-x1) / (x1+0.0000001) * 100
        mse_num += (y1-x1)**2
        total_err += err
        if x1 + y1 == 0:
            rpds.append(0)
        else:
            rpds.append(abs(y1-x1) /(x1 + y1) * 2)
        if err < 1.0:
            num_less_than_one_percent += 1
        if err < 10.0:
            num_less_than_ten_percent += 1
    

    total_err = total_err / len(newx)
    if tot_x > 0:
        aggregate_err = tot_err_num / tot_x * 100
    else:
        aggregate_err = 0
    correl_co = np.corrcoef(newx, newy)[0][1]
    tot_rpd = 0
    for num in rpds:
        tot_rpd += num
    rmse = (math.sqrt(mse_num/(len(newx))))/((tot_x+0.000001)/len(newx))
    ret_rpd = (tot_rpd/len(rpds))*100

    return apps, newx, newy, total_err, correl_co, num_over, num_under, num_less_than_one_percent, aggregate_err, ret_rpd,\
        num_less_than_ten_percent,rmse

def make_submission_quality_image(
    image_type, traces, hw_cfg, outdir,
    font_size=25, tick_font_size=15, width=1500, height=1500
):
    kernel_data = []
    app_data = []
    app_min = 999999999999999999999999999999999.9
    app_max = 0
    markers =[dict(size = 10, color = 'rgba(0, 0, 200, .5)'),
              dict(size = 10, color = 'rgba(0, 0, 0, .5)'),
              dict(size = 10, color = 'rgba(0, 182, 0, .4)'),
              dict(size = 10,color = 'rgba(0, 0, 193, .9)'),
              dict(size = 10,color = 'rgba(155, 155, 155, .9)')]
    marker_sym =['x', 'circle', 'circle', 'circle', 'circle']
    count = 0
    kernel_annotations = []
    app_annotations = []
    agg_cfg = ""
    print_anno = ""
    applist_file_contents = ""
    kernellist_file_contents = ""
    kernel_csv_file_contents = ""
    app_csv_file_contents = ""
    
    renames = plot_correl_options.rename_data.split(',')
    marker_order = plot_correl_options.marker_order.split(',')
    
    pprint(traces)
    for trace, layout, cfg, anno, plotfile, err_dropped, apps_included, correlmap, hw_low_drop in traces:
        if len(traces) == len(renames):
            trace.name = renames[count]
        if count < len(marker_order) and marker_order[0] != "":
            trace.marker = markers[int(marker_order[count])]
            trace.marker.symbol = marker_sym[int(marker_order[count])]
        else:
            trace.marker = markers[count %len(markers)]
            trace.marker.symbol = marker_sym[count %len(markers)]
        trace.mode = "markers"
        trace.error_x.color = trace.marker.color
        
        # Set the alpha on the error bars to be 30%
        trace.error_x.color =  re.sub(r"(,.*,.*),.*\)",r"\1,0.3)", trace.error_x.color)
        kernel_annotations.append(make_anno1(anno,22,0,1.115 - count * 0.05))
        print_anno += anno + " :: {0} high error points dropped from Err calc. {1} dropped for HW too low (>{2})\n".format(
            err_dropped, hw_low_drop, correlmap.drophwnumbelow)
        agg_cfg += "." + cfg
        app_str, kernel_str = make_pretty_app_list(apps_included)
        kernellist_file_contents += "{0}\n{1}\n\n".format(anno, kernel_str)
        kernel_csv_file_contents += "{0}\n\n"\
            .format(getCorrelCsvRaw(anno, trace.text, trace.x, trace.y))

        apps,appx,appy,avg_err,correl_co,num_over,num_under,num_less_than_one_percent,agg_err,rpd,nltenp,nmse \
            = getAppData(trace.text, trace.x, trace.y,layout.xaxis.title, correlmap)

        app_max = max ( app_max, max(appx), max(appy) )
        app_min = min ( app_min, min(appx), min(appy) )

        app_csv_file_contents += "{0}\n\n"\
            .format(getCorrelCsvRaw( anno, apps,appx,appy ))
        kernel_data.append(trace)

        app_anno = cfg + " ({0} apps ({5} < 1% Err, {3} under, {4} over, {8} < 10% Err)) [Correl={1:.4} Err={2:.2f}% Agg_Err={6:.2f}% RPD={7:.2f}%,NMSE={9:.2f}]"\
            .format(len(apps), correl_co, avg_err,num_under,num_over,num_less_than_one_percent,agg_err,rpd,nltenp,nmse)
        applist_file_contents += "{0}\n{1}\n\n".format(app_anno, app_str)
        applist_file_contents += "apps:\n{0}\n\n".format(apps)
        app_annotations.append(make_anno1(app_anno,22,0,1.115 - count * 0.05))
        print_anno += "Per-App :: " + app_anno + "\n"

        if "Cycles" in layout.xaxis.title.text or "IPC" in layout.xaxis.title.text or "Instructions" in layout.xaxis.title.text:
            name_text = "<b>" + trace.name + " [Correl={0:.2} MAE={1:.0f}%]</b>".format(correl_co, avg_err, nmse)
        else:
            name_text = "<b>" + trace.name + " [Correl={0:.2} NRMSE={1:.2f}]</b>".format(correl_co, nmse)

        app_trace = go.Scatter(
            x = appx,
            y = appy,
            mode = 'markers',
            text=apps,
            marker = trace.marker,
            name=name_text,
        )
        app_data.append(app_trace)
        count += 1

    if not plot_correl_options.noanno:
        layout.annotations=kernel_annotations
    if plot_correl_options.plotname == "":
        plotname = plotfile + agg_cfg
    else:
        plotname = plotfile + "." + plot_correl_options.plotname

    plotname = os.path.join(outdir, plotname)[:200]

    if not os.path.isdir(outdir):
        os.makedirs(outdir)
    f = open(plotname + ".apps.txt", 'w')
    f.write(applist_file_contents)
    f.close()
    f = open(plotname + ".kernel.txt", 'w')
    f.write(kernellist_file_contents)
    f.close()
    f = open(plotname + ".kernel.raw.csv", 'w')
    f.write(kernel_csv_file_contents)
    f.close()
    f = open(plotname + ".app.raw.csv", 'w')
    f.write(app_csv_file_contents)
    f.close()

    print("Plotting {0} : [{1}]\n{2}"\
        .format(hw_cfg, layout.title.text, print_anno))

    png_layout = copy.deepcopy(layout)
    png_layout.title=None
#    for anno in png_layout.annotations:
#        anno.font= Font(size=22,color='black')
    png_layout.xaxis.titlefont.size = font_size
    png_layout.xaxis.titlefont.color='black'
    png_layout.xaxis.tickfont.size=tick_font_size
    png_layout.xaxis.tickfont.color='black'
    png_layout.xaxis.type=correlmap.plottype
    png_layout.xaxis.autorange=True

    png_layout.yaxis.titlefont.size = font_size
    png_layout.yaxis.tickfont.size = tick_font_size
    png_layout.yaxis.titlefont.color='black'
    png_layout.yaxis.tickfont.color='black'
    png_layout.yaxis.type=correlmap.plottype
    png_layout.yaxis.autorange=True

    png_layout.margin.t = 100

    png_layout.legend=dict(
        x=-.2,
        y=1.2,
        traceorder='normal',
        font=dict(
            family='sans-serif',
            size=font_size,
            color='#000'
        ),
        bgcolor='#E2E2E2',
        bordercolor='#FFFFFF',
        borderwidth=2
   )
    xyline = go.Scatter(x=[layout.xaxis.range[0] + 1, layout.xaxis.range[1]],
        y=[layout.xaxis.range[0] + 1,layout.xaxis.range[1]],showlegend=False,mode="lines")
    xyline.line.color = 'rgba(255,0,0,.7)'
    kernel_data.append(xyline)

    if "linear" == correlmap.plottype:
        app_min = -5
        legypos = float(plot_correl_options.legend)*.95
        legxpos = -.25
        axisrange=[app_min*.95, app_max*1.05]
    else:
        legypos = float(plot_correl_options.legend)
        legxpos = -.35
        if app_min == 0:
            rmin = np.log10(0.9)
        else:
            rmin = np.log10(app_min)*.98
        if app_max == 0:
            rmax = 0
        else:
            rmax = np.log10(app_max)*1.02
        axisrange=[rmin, rmax]

    app_layout = go.Layout(
        title="Per App " + layout.title.text,
        xaxis=dict(
            title=layout.xaxis.title,
            range=axisrange,
            gridcolor="rgba(128,128,128,.4)",
            zerolinecolor="rgba(128,128,128,.4)"
        ),
        yaxis=dict(
            title=layout.yaxis.title,
            range=axisrange,
            gridcolor="rgba(128,128,128,.4)",
            zerolinecolor="rgba(128,128,128,.4)"
        ),
        legend=dict(
            x=legxpos,
            y=legypos,
            traceorder='normal',
            font=dict(
                family='sans-serif',
                size=font_size,
                color='#000'
            ),
            bgcolor='rgba(255,255,255,0.0)',
            bordercolor='rgba(255,255,255,0.0)',
            borderwidth=2
        ),
        plot_bgcolor='#FFFFFF',
        # autosize=False,
        # width=2000,
        # height=2000,
   )

    app_layout.title=None
    app_layout.xaxis.titlefont.size = font_size
    app_layout.xaxis.titlefont.color='black'
    app_layout.xaxis.tickfont.size=tick_font_size
    app_layout.xaxis.tickfont.color='black'
    app_layout.xaxis.type=correlmap.plottype
#    app_layout.xaxis.autorange=True

    app_layout.yaxis.titlefont.size = font_size
    app_layout.yaxis.tickfont.size = tick_font_size
    app_layout.yaxis.titlefont.color='black'
    app_layout.yaxis.tickfont.color='black'
    app_layout.yaxis.type=correlmap.plottype
#    app_layout.yaxis.autorange=True

    if not plot_correl_options.noanno:
        app_layout.annotations=app_annotations
    app_xyline = go.Scatter(
        x=[app_min + 1,
            app_max],
        y=[app_min + 1,
            app_max],
            showlegend=False,mode="lines")
    app_xyline.line.color = xyline.line.color
    app_data.append(app_xyline)
    # plotly will only let you do .pdf if you pay for it - I have.
    # To get this to work for free change the extension to .png
    if image_type != "":
#        png_name = plotname.replace(".", "_") + "." + image_type
#        Figure(data=kernel_data,layout=png_layout).write_image(png_name, height=512.0*1.05, width=512)

        png_name = plotname.replace(".", "_") + ".per-app." + image_type
        go.Figure(data=app_data,layout=app_layout).write_image(
            png_name,
            height=height,
            width=width,
            # height=512.0*1.05,
            # width=512,
        )


    # This generates the html
    plotly.offline.plot(go.Figure(data=kernel_data,layout=png_layout), \
        filename= plotname + ".per-kernel.html", auto_open=False)

    plotly.offline.plot(go.Figure(data=app_data,layout=app_layout), \
        filename= plotname + ".per-app.html", auto_open=False)

In [51]:
correl_outdir = os.path.join(this_directory, "correl-html")
for (plotfile, hw_cfg), traces in fig_data.items():
    # pdf requires paid plotly account wtf
    make_submission_quality_image("pdf", traces, hw_cfg, correl_outdir,
                                  font_size=25, tick_font_size=20, width=1000, height=1000)
print("Output Available at: file://{0}".format(correl_outdir))

[(Scatter({
    'error_x': {'array': [115.20000000000073, 57.600000000000364,
                          38.400000000001455, 0.0, 76.79999999999745,
                          211.1999999999971, 0.0, 134.40000000000146,
                          38.40000000000009, 364.7999999999993, 19.200000000000728,
                          345.60000000000036, 19.200000000000728,
                          19.80000000000109, 38.39999999999873, 19.200000000000728],
                'arrayminus': [115.19999999999891, 57.600000000000364,
                               38.399999999999636, 0.0, 76.80000000000109,
                               211.20000000000073, 0.0, 134.39999999999964,
                               38.40000000000009, 364.8000000000011,
                               19.19999999999891, 345.59999999999854,
                               19.199999999999818, 19.80000000000109,
                               38.399999999999636, 19.199999999999818],
                'symmetric': False,
        


invalid value encountered in divide


invalid value encountered in divide



[(Scatter({
    'error_x': {'array': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                'arrayminus': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                'symmetric': False,
                'type': 'data',
                'visible': True},
    'mode': 'markers',
    'name': 'QV100-PTX',
    'text': [backprop-rodinia-2.0-ft/4096___data_result_4096_txt--
             _Z22bpnn_layerforward_CUDAPfS_S_S_ii (Err=-73.96%,HW-
             Range=+0.00%/-0.00%), backprop-
             rodinia-2.0-ft/4096___data_result_4096_txt--
             _Z24bpnn_adjust_weights_cudaPfiS_iS_S_ (Err=-79.02%,HW-
             Range=+0.00%/-0.00%), bfs-
             rodinia-2.0-ft/__data_graph4096_txt___data_graph4096_result_txt--
             _Z6KernelP4NodePiPbS2_S2_S1_i (Err=10245.52%,HW-Range=+0.00%/-0.00%),
             bfs-rodinia-2.0-ft/__data_graph4096_txt___data_graph4096_result_txt--
             _Z7Kernel2PbS_S_S_i (Err=99.22%,HW-Range=+0.00%/-0.00%), hotspot-
             