In [166]:
import numpy as np
import os
import glob
import csv
import re
import pandas as pd
from pprint import pprint
from pathlib import Path

In [167]:
root_path = Path("/home/roman/dev/gpgpusims")

In [168]:
# read hw stats for vectoradd
parsed_dir = root_path / "analyze/parsed/"
hw_cycle_csvs = list(parsed_dir.glob("bfs-rodinia-1080ti.csv.cycle*"))
pprint(hw_cycle_csvs)

hw_cycle_df = pd.concat([pd.read_csv(csv) for csv in hw_cycle_csvs], ignore_index=False)
# remove the units
hw_cycle_df = hw_cycle_df[~hw_cycle_df["Correlation_ID"].isnull()]
# remove memcopies
hw_cycle_df = hw_cycle_df[~hw_cycle_df["Name"].str.contains(r"\[CUDA memcpy .*\]")]
# name refers to kernels now
hw_cycle_df = hw_cycle_df.rename(columns={"Name": "Kernel"})
# remove columns that are only relevant for memcopies
# df = df.loc[:,df.notna().any(axis=0)]
hw_cycle_df = hw_cycle_df.drop(columns=["Size", "Throughput", "SrcMemType", "DstMemType"])
# set the correct dtypes
hw_cycle_df = hw_cycle_df.astype({
    "Start": "float64",
    "Duration": "float64",
    "Static SMem": "float64",
    "Dynamic SMem": "float64",
    "Device": "string",
    "Kernel": "string",
})
print(hw_cycle_df.dtypes)
print(hw_cycle_df.shape)
hw_cycle_df

[PosixPath('/home/roman/dev/gpgpusims/analyze/parsed/bfs-rodinia-1080ti.csv.cycle.1'),
 PosixPath('/home/roman/dev/gpgpusims/analyze/parsed/bfs-rodinia-1080ti.csv.cycle.0')]
Start                   float64
Duration                float64
Grid X                  float64
Grid Y                  float64
Grid Z                  float64
Block X                 float64
Block Y                 float64
Block Z                 float64
Registers Per Thread    float64
Static SMem             float64
Dynamic SMem            float64
Device                   string
Context                 float64
Stream                  float64
Kernel                   string
Correlation_ID          float64
dtype: object
(32, 16)


Unnamed: 0,Start,Duration,Grid X,Grid Y,Grid Z,Block X,Block Y,Block Z,Registers Per Thread,Static SMem,Dynamic SMem,Device,Context,Stream,Kernel,Correlation_ID
8,326293.037,11.232,8.0,1.0,1.0,512.0,1.0,1.0,19.0,0.0,0.0,GeForce GTX 1080 Ti (0),1.0,7.0,_Z6KernelP4NodePiPbS2_S2_S1_i,124.0
9,326321.55,2.336,8.0,1.0,1.0,512.0,1.0,1.0,12.0,0.0,0.0,GeForce GTX 1080 Ti (0),1.0,7.0,_Z7Kernel2PbS_S_S_i,131.0
12,326364.464,9.633,8.0,1.0,1.0,512.0,1.0,1.0,19.0,0.0,0.0,GeForce GTX 1080 Ti (0),1.0,7.0,_Z6KernelP4NodePiPbS2_S2_S1_i,142.0
13,326376.657,2.176,8.0,1.0,1.0,512.0,1.0,1.0,12.0,0.0,0.0,GeForce GTX 1080 Ti (0),1.0,7.0,_Z7Kernel2PbS_S_S_i,149.0
16,326410.259,11.008,8.0,1.0,1.0,512.0,1.0,1.0,19.0,0.0,0.0,GeForce GTX 1080 Ti (0),1.0,7.0,_Z6KernelP4NodePiPbS2_S2_S1_i,160.0
17,326423.027,2.208,8.0,1.0,1.0,512.0,1.0,1.0,12.0,0.0,0.0,GeForce GTX 1080 Ti (0),1.0,7.0,_Z7Kernel2PbS_S_S_i,167.0
20,326454.197,13.249,8.0,1.0,1.0,512.0,1.0,1.0,19.0,0.0,0.0,GeForce GTX 1080 Ti (0),1.0,7.0,_Z6KernelP4NodePiPbS2_S2_S1_i,178.0
21,326469.174,2.272,8.0,1.0,1.0,512.0,1.0,1.0,12.0,0.0,0.0,GeForce GTX 1080 Ti (0),1.0,7.0,_Z7Kernel2PbS_S_S_i,185.0
24,326498.167,13.985,8.0,1.0,1.0,512.0,1.0,1.0,19.0,0.0,0.0,GeForce GTX 1080 Ti (0),1.0,7.0,_Z6KernelP4NodePiPbS2_S2_S1_i,196.0
25,326513.912,2.208,8.0,1.0,1.0,512.0,1.0,1.0,12.0,0.0,0.0,GeForce GTX 1080 Ti (0),1.0,7.0,_Z7Kernel2PbS_S_S_i,203.0


In [169]:
# read hw stats for vectoradd
hw_csv = root_path / "analyze/parsed/bfs-rodinia-1080ti.csv"
hw_df = pd.read_csv(hw_csv)
# remove the units
hw_df = hw_df[~hw_df["Correlation_ID"].isnull()]
print(hw_df.shape)
pprint(set(hw_cycle_df.columns.to_list()).intersection(set(hw_df.columns.to_list())))
# new_hw_df = pd.concat([hw_df, hw_cycle_df], join="outer", on)
# print(new_hw_df.shape)
inner_hw_df = pd.merge(hw_df, hw_cycle_df, how="inner", on=["Stream", "Context", "Device", "Kernel"])
print(inner_hw_df.shape)
# print(inner_hw_df.isna().any(axis=0))
# no no nan values
assert inner_hw_df.isna().any().sum() == 0
# inner_hw_df

(16, 125)
{'Stream', 'Context', 'Device', 'Kernel', 'Correlation_ID'}
(256, 137)


In [186]:
# read sim stats for vectoradd
sim_csv = root_path / "analyze/parsed/bfs-rodinia-gpgpusim.csv"
sim_df = pd.read_csv(sim_csv) # , index_col=[0]) # ,1,2]) # ["kernel", "kernel_id", "stat"])
sim_df = sim_df.pivot(index=["kernel", "kernel_id"], columns=["stat"])["value"]
# sim_df.sort_index()
print(sim_df.shape)
sim_df

(2, 23)


Unnamed: 0_level_0,stat,gpgpu_n_tot_w_icount,gpgpu_silicon_slowdown,gpgpu_simulation_rate,gpgpu_simulation_time_sec,gpu_ipc,gpu_occupancy,gpu_tot_ipc,gpu_tot_sim_cycle,gpu_total_instructions,k-count,...,l2_cache_read_total,l2_cache_write_hit,l2_cache_write_total,total_core_cache_read_hit,total_core_cache_read_mshr_hit,total_core_cache_read_total,total_core_cache_write_hit,total_core_cache_write_total,total_dram_reads,total_dram_writes
kernel,kernel_id,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
_Z6KernelP4NodePiPbS2_S2_S1_i,0,113730.0,1203826.0,1202.0,63.0,12.2776,16.3771,13.6259,74677.0,1027692.0,8.0,...,13735.0,15818.0,15818.0,56226.0,1630.0,71591.0,8302.0,15818.0,0.0,0.0
_Z7Kernel2PbS_S_S_i,0,24471.0,1198840.0,1207.0,36.0,13.0178,24.4543,13.5987,44871.0,598006.0,8.0,...,1024.0,2196.0,2196.0,0.0,0.0,1024.0,1050.0,2196.0,0.0,0.0


In [218]:
def correlate(sim_kernel_df, hw_kernel_df):
    hw_array = []
    hw_error = []
    hw_error_min = []
    sim_array = []
    label_array = []
    color_array = []

    appcount = 0
    kernelcount = 0
    num_less_than_one_percent = 0
    num_less_than_ten_percent = 0
    num_under = 0
    num_over = 0
    errs = []
    # sim_appargs_leftover = set(copy.deepcopy(list(sim_for_cfg.keys())))
    # hw_appargs_leftover = set(copy.deepcopy(list(hw_data[hw_cfg].keys())))
    max_axis_val = 0.0
    min_axis_val = 99999999999999999999999999999.9
    err_dropped_stats = 0
    hw_low_drop_stats = 0
    apps_included = {}
    
    # print(sim_kernel_df["gpu_tot_sim_cycle"])
    correl_list = [
        dict(
            name="Cycles",
            hw=np.average(np.average(hw_kernel_df["Duration"])*1200),
            hw_error=(
                np.max(hw_kernel_df["Duration"])*1200 - np.average(hw_kernel_df["Duration"])*1200,
                np.average(hw_kernel_df["Duration"])*1200 - np.min(hw_kernel_df["Duration"])*1200
            ),
            sim=float(sim_kernel_df["gpu_tot_sim_cycle"]),
        ),
    ]
    pprint(correl_list)
    for corr in correl_list:
        if corr["hw_error"] != None:
            maxe,mine = corr["hw_error"]
            hw_error.append(maxe)
            hw_error_min.append(mine)
        else:
            hw_error.append(0)
            hw_error_min.append(0)
        err = sim_array[-1] - hw_array[-1]

In [219]:
# hw: group all data by: device, kernel
# hw_kernel_df = inner_hw_df.groupby(["Device", "Kernel"])
# sim: group by kernel
# sim_kernel_df = sim_df.groupby(["kernel"])
# kernels = sim_df.index.to_frame()["kernel"].unique()


# kernels = sim_df.index.get_level_values("kernel").unique()
# for kernel in kernels:
for (device, kernel), hw_kernel_df in inner_hw_df.groupby(["Device", "Kernel"]):
    # get_level_values
    print(device, kernel)
    sim_kernel_df = sim_df[sim_df.index.get_level_values("kernel") == kernel]
    # hw_kernel_df = inner_hw_df[sim_df.index.get_level_values("kernel") == kernel]
    # sim_kernel_df = sim_df[sim_df.index.to_frame()["kernel"] == kernel]
    print("sim_kernel_df", sim_kernel_df.shape)
    print("hw_kernel_df", hw_kernel_df.shape)
    
    correlate(sim_kernel_df, hw_kernel_df)
    break
    
# sim_kernel_df
# hw_kernel_df
# pprint(kernels)
# for kernel, sim_kernel_df in sim_df.groupby(["kernel"]):
#     print(name)
#     print(group)

GeForce GTX 1080 Ti (0) _Z6KernelP4NodePiPbS2_S2_S1_i
sim_kernel_df (1, 23)
hw_kernel_df (128, 137)
kernel                         kernel_id
_Z6KernelP4NodePiPbS2_S2_S1_i  0            74677.0
Name: gpu_tot_sim_cycle, dtype: float64
[{'hw': 11779.724999999999,
  'hw_error': (5002.2750000000015, 8016.524999999998),
  'name': 'Cycles',
  'sim': 74677.0}]


In [188]:
class Correlation():
    def __init__(self, hw, sim):
        self.hw = hw
        self.sim = sim
    
    def hw_cycles(self):
        return np.average(self.hw["Duration"])*1200

correl_list = [
    dict(
        chart_name="Cycles",
        plotfile="titanv-cycles",
        hw_eval="np.average(hw[\"Duration\"])*1200",
        hw_error="np.max(hw[\"Duration\"])*1200 - np.average(hw[\"Duration\"])*1200,"+\
                 "np.average(hw[\"Duration\"])*1200 - np.min(hw[\"Duration\"])*1200",
        sim_eval="float(sim[\"gpu_tot_sim_cycle\s*=\s*(.*)\"])",
        hw_name="TITAN V",
        drophwnumbelow=0,
        plottype="log",
        stattype="counter"
    ),
]


pprint(sim_df.columns.to_list())
print(sim_df["gpu_tot_sim_cycle"])
correl_list = [
    dict(
        name="Cycles",
        hw=np.average(np.average(inner_hw_df["Duration"])*1200),
        hw_error=(
            np.max(inner_hw_df["Duration"])*1200 - np.average(inner_hw_df["Duration"])*1200,
            np.average(inner_hw_df["Duration"])*1200 - np.min(inner_hw_df["Duration"])*1200
        ),
        sim=float(sim_df["gpu_tot_sim_cycle"]),
    ),
]
pprint(correl_list)

['gpgpu_n_tot_w_icount',
 'gpgpu_silicon_slowdown',
 'gpgpu_simulation_rate',
 'gpgpu_simulation_time_sec',
 'gpu_ipc',
 'gpu_occupancy',
 'gpu_tot_ipc',
 'gpu_tot_sim_cycle',
 'gpu_total_instructions',
 'k-count',
 'kernel_launch_uid',
 'l2_bandwidth_gbps',
 'l2_cache_read_hit',
 'l2_cache_read_total',
 'l2_cache_write_hit',
 'l2_cache_write_total',
 'total_core_cache_read_hit',
 'total_core_cache_read_mshr_hit',
 'total_core_cache_read_total',
 'total_core_cache_write_hit',
 'total_core_cache_write_total',
 'total_dram_reads',
 'total_dram_writes']
kernel                         kernel_id
_Z6KernelP4NodePiPbS2_S2_S1_i  0            74677.0
_Z7Kernel2PbS_S_S_i            0            44871.0
Name: gpu_tot_sim_cycle, dtype: float64


TypeError: cannot convert the series to <class 'float'>

In [172]:
import copy
import sys

for correl in correl_list:
    hw_array = []
    hw_error = []
    hw_error_min = []
    sim_array = []
    label_array = []
    color_array = []

    appcount = 0
    kernelcount = 0
    num_less_than_one_percent = 0
    num_less_than_ten_percent = 0
    num_under = 0
    num_over = 0
    errs = []
    # sim_appargs_leftover = set(copy.deepcopy(list(sim_for_cfg.keys())))
    # hw_appargs_leftover = set(copy.deepcopy(list(hw_data[hw_cfg].keys())))
    max_axis_val = 0.0
    min_axis_val = 99999999999999999999999999999.9
    err_dropped_stats = 0
    hw_low_drop_stats = 0
    apps_included = {}
    
    hw_klist = hw_df
    sim_klist = sim_df
    if len(sim_klist) <= len(hw_klist):
        raise ValueError(f"Found hw/sim match. Sim={len(sim_klist)}. HW={len(hw_klist)}")
    
    count = 0
    for sim in sim_klist:
        hw = hw_klist.iloc[count]
        pprint(hw)
        # pprint(hw.columns)
        print(hw.loc["Duration"])
        try:
            print(f"Evaluating HW: {correl['hw_eval']}")
            hw_array.append(eval(correl['hw_eval']))
        except:
            e = sys.exc_info()[0]
            print(f"Potentially uncollected stat in {correl['hw_eval']}.Error: {e}")
            count += 1
            continue

Device                                 GeForce GTX 1080 Ti (0)
Context                                                    1.0
Stream                                                     7.0
Kernel                           _Z6KernelP4NodePiPbS2_S2_S1_i
Correlation_ID                                           123.0
                                             ...              
dram_write_throughput                                 0.000000
dram_utilization                                      Idle (0)
half_precision_fu_utilization                         Idle (0)
ecc_transactions                                           0.0
ecc_throughput                                        0.000000
Name: 1, Length: 125, dtype: object


KeyError: 'Duration'

In [None]:
for correl in correl_list:
        if correl.hw_name != "all" and correl.hw_name not in hw_cfg:
            logger.log("for cfg:{0}, hw_cfg:{1} - Skipping plot:\n{2}".format(cfg, hw_cfg, correl))
            continue

        hw_array = []
        hw_error = []
        hw_error_min = []
        sim_array = []
        label_array = []
        color_array = []

        appcount = 0
        kernelcount = 0
        num_less_than_one_percent = 0
        num_less_than_ten_percent = 0
        num_under = 0
        num_over = 0
        errs = []
        sim_appargs_leftover = set(copy.deepcopy(list(sim_for_cfg.keys())))
        hw_appargs_leftover = set(copy.deepcopy(list(hw_data[hw_cfg].keys())))
        max_axis_val = 0.0
        min_axis_val = 99999999999999999999999999999.9
        err_dropped_stats = 0
        hw_low_drop_stats = 0
        apps_included = {}
        for appargs,sim_klist in sim_for_cfg.items():
            if appargs in hw_data[hw_cfg]:
                if (isAppBanned( appargs, blacklist )):
                    continue

                hw_klist = hw_data[hw_cfg][appargs]
                processAnyKernels = False
                if len(sim_klist) <= len(hw_klist):
                    logger.log("Found hw/sim match for {0}. Sim={1}. HW={2}"
                        .format(appargs, len(sim_klist), len(hw_klist)))
                    sim_appargs_leftover.remove(appargs)
                    hw_appargs_leftover.remove(appargs)
                    count = 0
                    for sim in sim_klist:
                        hw = hw_klist[count]
                        try:
                            logger.log("Evaluating HW: {0}".format(correl.hw_eval))
                            hw_array.append(eval(correl.hw_eval))
                        except:
                            e = sys.exc_info()[0]
                            logger.log("Potentially uncollected stat in {0}.Error: {1}".format(correl.hw_eval, e))
#                            print hw
#                            exit(1)
                            count += 1
                            continue

                        if hw_array[-1] < correl.drophwnumbelow:
                            hw_low_drop_stats += 1
                            hw_array = hw_array[:-1]
                            count += 1
                            continue

                        try:
                            sim_array.append(eval(correl.sim_eval))
                        except KeyError as e:
                            logger.log("Potentially uncollected stat in {0}.Error: {1}".format(correl.sim_eval, e))
                            hw_array = hw_array[:-1]
                            count += 1
                            continue
                        except ZeroDivisionError as e:
                           logger.log("Division by zerofor  stat in {0}.Error: {1}".format(correl.sim_eval, e))
                           count += 1
                           hw_array = hw_array[:-1]
                           continue
 
                        if correl.hw_error != None:
                            maxe,mine = eval(correl.hw_error)
                            hw_error.append(maxe)
                            hw_error_min.append(mine)
                        else:
                            hw_error.append(0)
                            hw_error_min.append(0)


                        if appargs not in apps_included:
                            apps_included[appargs] = [];

                        processAnyKernels = True
                        err = 99999
                        hw_high = 0
                        hw_low = 999999999999

                        kernelcount += 1
                        err = sim_array[-1] - hw_array[-1]

                        if hw_array[-1] != 0:
                            hw_high = (hw_error[-1]/hw_array[-1]) * 100
                            hw_low = (hw_error_min[-1]/hw_array[-1]) * 100
                            err = (err / hw_array[-1]) * 100
                        else:
                            hw_high = 0
                            hw_low = 0
                            err = 0


                        if abs(err) < 10.0:
                            num_less_than_ten_percent += 1

                        if abs(err) < 1.0:
                            num_less_than_one_percent += 1

                        elif err > 0:
                            num_over += 1
                        else:
                            num_under += 1

                        errs.append(abs(err))
                        apps_included[appargs].append((err, sim_klist[count]["Kernel"]))

                        label_array.append((appargs + "--" + sim_klist[count]["Kernel"]) +
                            " (Err={0:.2f}%,HW-Range=+{1:.2f}%/-{2:.2f}%)".format(err, hw_high,hw_low))
                        count += 1
                        if hw_array[-1] > max_axis_val:
                            max_axis_val = hw_array[-1]
                        if sim_array[-1] > max_axis_val:
                            max_axis_val = sim_array[-1]

                        if hw_array[-1] < min_axis_val:
                            min_axis_val = hw_array[-1]
                        if sim_array[-1] < min_axis_val:
                            min_axis_val = sim_array[-1]

                else:
                    logger.log("For appargs={0}, HW/SW kernels do not match HW={1}, SIM={2} and software has more than hardware\n"\
                        .format(appargs, len(hw_klist), len(sim_klist)))
                if processAnyKernels:
                    appcount += 1
        logger.log("Sim apps no HW:\n{0}\nHW apps no sim data:\n{1}"\
            .format(sim_appargs_leftover, hw_appargs_leftover))

        if len(errs) == 0:
            continue

        for i in range(len(hw_array)):
            if hw_array[i] != 0:
                hw_high = (hw_error[i]/hw_array[i]) * 100
                hw_low = (hw_error_min[i]/hw_array[i]) * 100
            else:
                hw_high = 0
                hw_low = 0

        correl_co = numpy.corrcoef(hw_array, sim_array)[0][1]
        avg_err = 0
        for err in errs:
            avg_err += err
        avg_err = avg_err / len(errs)

        trace = go.Scatter(
            x = hw_array,
            y = sim_array,
            mode = 'markers',
            text=label_array,
            error_x=dict(
                type='data',
                symmetric=False,
                array=hw_error,
                arrayminus=hw_error_min,
                visible=True
            ),
            name=cfg,
        )
        if not options.err_off:
            anno = cfg + " ({0} apps, {1} kernels ({6} < 1% Err, {4} under, {5} over)) [Correl={2:.4} Err={3:.2f}%]"\
                .format(appcount, kernelcount,correl_co, avg_err,num_under,num_over,num_less_than_one_percent, num_less_than_ten_percent)
        else:
            anno = cfg + " ({0} apps, {1} kernels ({6} < 1% Err, {4} under, {5} over)) [Correl={2:.4}]"\
                .format(appcount, kernelcount,correl_co, avg_err,num_under,num_over,num_less_than_one_percent, num_less_than_ten_percent)

        layout = Layout(
            title=correl.chart_name,
             xaxis=dict(
                title='Hardware {1}'.format(hw_cfg, correl.chart_name),
                range=[min_axis_val * 0.9 ,max_axis_val*1.1]
            ),
            yaxis=dict(
                title='Simulation {0}'.format(correl.chart_name),
                range=[min_axis_val * 0.9 ,max_axis_val*1.1]
            ),
        )

        data = [trace]

        if (correl.plotfile, hw_cfg) not in fig_data:
            fig_data[ (correl.plotfile, hw_cfg) ] = []
        fig_data[ (correl.plotfile, hw_cfg) ].append((trace, layout, cfg, anno, correl.plotfile, err_dropped_stats, apps_included, correl, hw_low_drop_stats))