In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from pathlib import Path
import pandas as pd
import numpy as np
from copy import copy
from pprint import pprint
from plot import *

import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

import itertools
import abc
import sys
import re

sys.path.append("../")
import gpusims
import gpusims.plot.metrics as metric
from gpusims.plot.data import PlotData
from gpusims.config import Config, parse_configs
from gpusims.bench import parse_benchmarks

In [3]:
pd.set_option('display.max_rows', 700)
pd.set_option('display.max_columns', 700)
np.seterr(all='raise')

{'divide': 'warn', 'over': 'warn', 'under': 'ignore', 'invalid': 'warn'}

In [4]:
root_dir = Path("/Users/roman/dev/gpgpusims")
benchmark_dir = root_dir / "benchmarks"
run_dir = root_dir / "run"
assert benchmark_dir.is_dir()
assert run_dir.is_dir()

In [5]:
simulators = copy(gpusims.SIMULATORS)
configs = parse_configs(benchmark_dir / "configs" / "configs.yml")
benchmarks = parse_benchmarks(benchmark_dir / "benchmarks.yml")

pprint(simulators)
pprint(configs)
pprint(benchmarks)

{'accelsim-ptx': <class 'gpusims.accelsim.AccelSimPTXBenchmarkConfig'>,
 'accelsim-sass': <class 'gpusims.accelsim_sass.AccelSimSASSBenchmarkConfig'>,
 'm2s': <class 'gpusims.multi2sim.Multi2SimBenchmarkConfig'>,
 'macsim': <class 'gpusims.macsim.MacSimBenchmarkConfig'>,
 'native': <class 'gpusims.native.NativeBenchmarkConfig'>,
 'tejas': <class 'gpusims.tejas.TejasBenchmarkConfig'>}
{'sm6_gtx1080': Config(key='sm6_gtx1080', name='GTX 1080', path=PosixPath('/Users/roman/dev/gpgpusims/benchmarks/configs/SM6_GTX1080'), spec={'sm_count': 20, 'clock_speed': 1607}),
 'sm86_a4000': Config(key='sm86_a4000', name='A4000', path=PosixPath('/Users/roman/dev/gpgpusims/benchmarks/configs/SM86_A4000'), spec={'sm_count': 48, 'clock_speed': 735}),
 'sm86_rtx3070': Config(key='sm86_rtx3070', name='RTX 3070', path=PosixPath('/Users/roman/dev/gpgpusims/benchmarks/configs/SM86_RTX3070'), spec={'sm_count': 46, 'clock_speed': 1132})}
{'babelstream': Benchmark(/Users/roman/dev/gpgpusims/benchmarks/BabelStrea

In [6]:
def plot_bars(
    selected_simulators, data, config,
    title=None, fontsize=20, title_fontsize=25, font_family="Helvetica", round_to=1,
    ylabel=None, log=False, sort=False,
):
    bars = []
    
    def sort_by_bench_size(key):
        # print(key)
        # print(bench)
        # print(data.iloc[key.index]["Value"])
        values = data.iloc[key.index]["Value"]
        return values
    
    if sort:
        data = data.sort_values(by=["Simulator", "Benchmark"], key=sort_by_bench_size)
    
    data = data.set_index(["Simulator"])
    
    data["ValueStr"] = data["Value"].apply(lambda v: human_format(v, round_to=round_to))
    if sort:
        selected_simulators = data.index.get_level_values("Simulator").unique().tolist()
    # benchmarks = data["Benchmark"].unique().tolist()
    
    all_data = []
    for i, sim in enumerate(selected_simulators):
        # add hardware GPU name
        name = sim
        if sim in SIM_NAME:
            name = SIM_NAME[sim]
        if sim == gpusims.NATIVE:
            name += f" ({config.name})"
        
        sim_data = data.loc[[sim]]
        all_data.append(sim_data["Value"])
        
        bars.append(go.Bar(
            x = sim_data["Benchmark"],
            y = sim_data["Value"],
            text = sim_data["ValueStr"],
            textposition='auto',
            textangle=0,
            textfont = dict(
                size=fontsize,
                color="black",
            ),
            hovertemplate = (
                "<b>%{x}</b><br>" +
                "%{y:.2f}<br>"
            ),
            name=str(name),
            marker=sim_marker(sim),
        ))
        # bgcolor
        # fgcolor
        # fgopacity
        # fillmode
        # shape
        # size
        # solidity

    all_data = pd.concat(all_data)
    y_max = all_data.max()
    
    layout = go.Layout(
        font_family=font_family,
        font_color="black",
        font_size=fontsize,
        yaxis=go.layout.YAxis(
            title=ylabel,
            gridcolor="gray",
            zerolinecolor="gray",
        ),
        xaxis=go.layout.XAxis(
            tickfont=dict(
                size=0.8 * fontsize,
            ),
            dividerwidth=0,
            dividercolor="white",
        ),
        hoverlabel=dict(
            bgcolor="white",
            font_size=fontsize,
            font_family=font_family,
        ),
        barmode="group",
        bargroupgap=0.1,
        bargap=0.25,
        showlegend=True,
        **DEFAULT_LAYOUT_OPTIONS,
    )
    if title is not None:
        layout.title = dict(
            text=title,
            font=go.layout.title.Font(size = title_fontsize),
            x=0.5,
            y=0.95,
            xanchor="center",
            yanchor="top",
        )
    if log:
        y_ticks = powers_of(10, including=y_max)
        # print(y_ticks)
        # print([human_format(t, round_to=0) for t in y_ticks])
        layout.yaxis.tickmode = "array"
        layout.yaxis.tickvals = y_ticks
        layout.yaxis.ticktext = [human_format(t, round_to=0) for t in y_ticks]
        layout.yaxis.type = "log"
    fig = go.Figure(data=bars, layout=layout)
    return fig

In [7]:
def plot_bars_exec_time(
    selected_simulators, data, config,
    title=None, fontsize=20, title_fontsize=25, font_family="Helvetica", round_to=1,
    ylabel=None, log=False,
):
    bars = []
    def sort_by_bench_size(key):
        # print(key)
        # print(bench)
        # print(data.iloc[key.index]["Value"])
        values = data.iloc[key.index]["Value"]
        return values
    
    #data = data.sort_values(
    #    by=["Simulator", "Benchmark"], key=sort_by_bench_size)
    
    data = data.set_index(["Simulator", "Kind"])
    # data = data.set_index(["Simulator", "Kind"])
    # data = data.sort_values(by="Benchmark")
    # data = data.sort_index()
    # simulators = data.index.get_level_values("Simulator").unique().tolist()
    # data.index.get_level_values("Simulator")
    # simulators = [s for s in simulators if ["Value"]]
    # benchmarks = trace_time["Benchmark"].values
    
    all_benchmarks = None
    for i, sim in enumerate(selected_simulators):
        # add hardware GPU name
        name = SIM_NAME[sim]
        if sim == gpusims.NATIVE:
            name += f" ({config.name})"
        
        # trace and sim time
        trace_time = data.loc[data.index == (sim, "Trace")]
        sim_time = data.loc[data.index == (sim, "Sim")]
        if sim != gpusims.NATIVE:
            trace_time = trace_time.round(round_to)
            sim_time = sim_time.round(round_to)    
        
        # total time (trace and sim)
        total_time = trace_time["Value"].values +  sim_time["Value"].values
        if sim != gpusims.NATIVE:
            total_time = total_time.round(round_to)
        
        if total_time.sum() == 0:
            # skip simulator
            continue
        
        benchmarks = trace_time["Benchmark"].values
        if all_benchmarks is None:
            all_benchmarks = benchmarks
            
        # need same number of benches for each simulator
        assert (benchmarks == all_benchmarks).all()
        
        x = [benchmarks, [SIM_ABBR[sim]] * len(benchmarks)]
        # flat_x = [item for sublist in x for item in sublist]
        # y = [10] * len(flat_x)
        y = trace_time["Value"]
        # if sim != "Hardware":
        #     y = y.round(round_to)
        
        # data.loc[data.index == (sim, "Trace")]
        
        bars.append(go.Bar(
            x = x,
            y = y,
            text = y.apply(lambda v: f"trace<br>{v}"),
            textposition='auto',
            textangle=0,
            showlegend=False,
            textfont = dict(
                size=fontsize,
                color="black",
            ),
            hovertemplate = (
                "<b>%{x}</b><br>" +
                "%{y:.2f}<br>"
            ),
            name=str(name),
            marker=go.bar.Marker(
                color=sim_marker(sim).color,
                pattern=sim_marker(sim).pattern,
                line=dict(
                    color="white",
                    # color="rgba(0,0,0,0.5)",
                    width=2,
                ),
            ),
        ))
        
        # continue
        # if sim != "Hardware":
        #     total_time = total_time.round(round_to)
        # print(total_time)
        bars.append(go.Bar(
            x = x,
            y = sim_time["Value"], # will be stacked
            text = total_time,
            textposition='auto',
            textangle=0,
            textfont = dict(
                size=fontsize,
                color="black",
            ),
            hovertemplate = (
                "<b>%{x}</b><br>" +
                "%{y:.2f}<br>"
            ),
            name=str(name),
            marker=go.bar.Marker(
                color=sim_marker(sim).color, # SIM_COLOR[sim],
                pattern=sim_marker(sim).pattern, # go.bar.marker.Pattern(
                    # shape=SIM_PATTERN[sim],
                # ),
                line=dict(
                    # color=SIM_COLOR[sim],
                    # color="rgba(0,0,0,0.2)",
                    color="white",
                    width=2,
                ),
            ),
        ))
    
    # add empty separator
    x = [all_benchmarks, [""] * len(all_benchmarks)]
    y = [0] * len([item for sublist in x for item in sublist])
    bars.append(go.Bar(
        x = x,
        y = y,
        showlegend=False,
    ))
    
    margin = 50
    layout = go.Layout(
        font_family=font_family,
        font_color="black",
        font_size=fontsize,
        yaxis=go.layout.YAxis(
            title=ylabel,
            gridcolor="gray",
            zerolinecolor="gray",
            # range=[min_axis_val * 0.9 ,max_axis_val*1.1]
        ),
        xaxis=go.layout.XAxis(
            # title="Benchmark",
            tickfont=dict(
                size=0.8 * fontsize,
            ),
            dividerwidth=0,
            dividercolor="white",
        ),
        hoverlabel=dict(
            bgcolor="white",
            font_size=fontsize,
            font_family=font_family,
        ),
        barmode="stack",
        # bargroupgap=0.1,
        bargap=0.1,
        showlegend=True,
        **DEFAULT_LAYOUT_OPTIONS,
    )
    if title is not None:
        layout.margin.t = 2*MARGIN
        layout.title=dict(
            text=title,
            font=go.layout.title.Font(size = title_fontsize),
            x=0.5,
            y=0.95,
            xanchor="center",
            yanchor="top",
        )
    if log:
        layout.yaxis.type = "log"
    return go.Figure(data=bars, layout=layout)

In [8]:
def plot_mean_slowdown_per_config(selected_simulators, df, config_order=None, sort=False):
    slowdown_df = df
    # return slowdown_df, None
    # slowdown_df = slowdown_df.drop(columns=["Kind"])
    # we drop "Kind", and sum over sim and trace times
    slowdown_df = slowdown_df[["Simulator", "Benchmark", "Config", "Value"]] # drop Kind
    slowdown_df = slowdown_df.groupby([
        "Simulator", "Benchmark", "Config"]).sum().reset_index()
    slowdown_df = slowdown_df[slowdown_df["Value"] > 0]
    # return slowdown_df, None

    hw = slowdown_df[slowdown_df["Simulator"] == gpusims.NATIVE]
    hw = hw[["Benchmark", "Config", "Value"]]
    hw = hw.rename(columns={"Value": "HardwareValue"})

    final_slowdown_df = []

    # slowdown_df = slowdown_df[slowdown_df["Value"] > 0]
    # print(slowdown_df.shape)
    # [["Simulator", "Value"]]
    # slowdown_df_group = slowdown_df.groupby("Simulator")
    # for sim, sim_df in slowdown_df.groupby("Simulator"):
    for i, sim in enumerate(selected_simulators):
        if sim == gpusims.NATIVE:
            continue
            
        sim_df = slowdown_df[slowdown_df["Simulator"] == sim]

        # print(sim_df.shape)
        # sim_df = sim_df[["Benchmark", "Config", "Value"]]
        # return sim_df, None
        sim_df = sim_df.rename(columns={"Value": "SimValue"})
        sim_df = sim_df.merge(hw, how="inner", on=["Benchmark", "Config"])
        sim_df["Value"] = sim_df["SimValue"] / sim_df["HardwareValue"]
        sim_df = sim_df[["Simulator", "Config", "Value"]]
        sim_df = sim_df.groupby(["Simulator", "Config"]).mean()
        sim_df = sim_df.reset_index()
        final_slowdown_df.append(sim_df)
        
    
    final_slowdown_df = pd.concat(final_slowdown_df)
    if config_order is not None:
        final_slowdown_df["ConfigIdx"] = final_slowdown_df["Config"].apply(lambda c: config_order.index(c))
        final_slowdown_df = final_slowdown_df.sort_values(by=["ConfigIdx"])
    final_slowdown_df = final_slowdown_df.rename(columns={"Config": "Benchmark"})
    # return final_slowdown_df, None
    fig = plot_bars(
        selected_simulators=selected_simulators,
        data=final_slowdown_df,
        config=config,
        ylabel="Slowdown factor",
        # skip titles, we use captions to save space
        # title=f"Mean simulation slowdown",
        fontsize=35,
        # title_fontsize=40,
        log=True,
        sort=sort,
    )
    fig.update_layout(width = 1500, height=400)
    return final_slowdown_df, fig

def plot_mean_slowdown(selected_simulators, df, config_order=None, sort=False):
    slowdown_df = df
    # we drop "Kind", and sum over sim and trace times
    slowdown_df = slowdown_df.drop(columns=["Kind", "Config"])
    slowdown_df = slowdown_df.groupby([
        "Simulator", "Benchmark"]).sum().reset_index()
    slowdown_df = slowdown_df[slowdown_df["Value"] > 0]
    # return slowdown_df, None

    hw = slowdown_df[slowdown_df["Simulator"] == gpusims.NATIVE]
    hw = hw[["Benchmark", "Value"]]
    hw = hw.rename(columns={"Value": "HardwareValue"})

    final_slowdown_df = []

    # for sim, sim_df in slowdown_df.groupby("Simulator"):
    for i, sim in enumerate(selected_simulators):
        if sim == gpusims.NATIVE:
            continue
            
        sim_df = slowdown_df[slowdown_df["Simulator"] == sim]

        # print(sim_df.shape)
        sim_df = sim_df.rename(columns={"Value": "SimValue"})
        sim_df = sim_df.merge(hw, how="inner", on=["Benchmark"])
        sim_df["Value"] = sim_df["SimValue"] / sim_df["HardwareValue"]
        sim_df = sim_df[["Simulator", "Value"]]
        sim_df = sim_df.groupby(["Simulator"]).mean()
        sim_df = sim_df.reset_index()
        final_slowdown_df.append(sim_df)
        
    
    final_slowdown_df = pd.concat(final_slowdown_df)
    final_slowdown_df["Benchmark"] = ""
    fig = plot_bars(
        selected_simulators=selected_simulators,
        data=final_slowdown_df,
        config=config,
        ylabel="Slowdown factor",
        # skip titles, we use captions to save space
        # title=f"Mean simulation slowdown",
        fontsize=35,
        # title_fontsize=40,
        log=True,
        sort=sort,
    )
    fig.update_layout(width = 1500, height=400)
    return final_slowdown_df, fig

if False:
    final_slowdown_df, fig = plot_mean_slowdown(
        selected_simulators=[s for s in selected_simulators if s != gpusims.NATIVE],
        df=all_benches_metric_df,
        config_order=[configs[c].name for c in selected_configs],
    )
    fig.show()
# final_slowdown_df

In [9]:
def plot_subplots(
    selected_simulators, selected_configs, per_config_data,
    title=None,
    # fontsize=35, title_fontsize=50, bar_fontsize=25, legend_fontsize=35, tick_fontsize=35,
    fontsize=9, title_fontsize=9, bar_fontsize=9, legend_fontsize=7, tick_fontsize=9,
    font_family="Helvetica", round_to=1,
    bargroupgap=0.1, bargap=0.1, barmode="group",
    ylabel=None, log=False,
    textangle=90,
    scale = 2
):
    bars = []
    subplot_titles = []
    all_data = []
    for ci, config_key in enumerate(selected_configs):
        ci += 1
        # print(config_key)
        if config_key not in per_config_data:
            continue
            
        config = configs[config_key]
        subplot_titles.append(config.name)
        data = per_config_data[config_key]
        data = data.set_index(["Simulator"])
    
        data["ValueStr"] = data["Value"].apply(lambda v: human_format(v, round_to=round_to))

        for si, sim in enumerate(selected_simulators):
            name = sim
            if sim in SIM_NAME:
                name = SIM_NAME[sim]

            sim_data = data.loc[[sim]]
            
            all_data.append(sim_data["Value"])

            bars.append(go.Bar(
                x = sim_data["Benchmark"],
                y = sim_data["Value"],
                text = sim_data["ValueStr"],
                textposition='auto',
                textangle=textangle,
                textfont = dict(
                    size=bar_fontsize*scale,
                    color="black",
                ),
                hovertemplate = (
                    "<b>%{x}</b><br>" +
                    "%{y:.2f}<br>"
                ),
                xaxis = "x" if ci <= 1 else f"x{ci}",
                name=str(name),
                showlegend=ci==1,
                marker=sim_marker(sim),
            ))

    all_data = pd.concat(all_data)
    y_max = all_data.max()
    
    # print(all_data)
    layout = make_subplots(rows=1, cols=len(selected_configs),
                  subplot_titles=subplot_titles,
                  shared_yaxes=True, horizontal_spacing=0.05).layout
    # print(layout)
    for ci in range(1, len(selected_configs)+1):
        xaxis = "xaxis" if ci <= 1 else f"xaxis{ci}"
        yaxis = "yaxis" if ci <= 1 else f"yaxis{ci}"
        if yaxis in layout:
            if log:
                layout[yaxis].type = "log"
            layout[yaxis].title = ylabel
            layout[yaxis].gridcolor = "gray"
            layout[yaxis].zerolinecolor = "gray"
            y_ticks = powers_of(10, including=y_max)
            # print(y_ticks)
            # print([human_format(t, round_to=0) for t in y_ticks])
            layout[yaxis].tickmode = "array"
            layout[yaxis].tickvals = y_ticks
            layout[yaxis].ticktext = [human_format(t, round_to=0) for t in y_ticks]
        if xaxis in layout:
            layout[xaxis].tickfont = go.layout.xaxis.Tickfont(size = tick_fontsize*scale)
            layout[xaxis].dividerwidth = 0
            layout[xaxis].dividercolor = "white"
            
    # print(layout)
    
    fig = go.Figure(data=bars, layout=layout)
    fig.update_layout(**DEFAULT_LAYOUT_OPTIONS)
    fig.update_layout(
        font=go.layout.Font(
            family = font_family,
            color = "black",
            size = fontsize*scale,
        ),
        hoverlabel = dict(
            bgcolor = "white",
            font_size = fontsize*scale,
            font_family = font_family,
        ),
        barmode = barmode,
        bargroupgap = bargroupgap,
        bargap = bargap,
        legend = go.layout.Legend(
            font=go.layout.legend.Font(size = legend_fontsize*scale),
            # entrywidth=100,
            # itemwidth=100,
            itemsizing="constant",
        ),
        plot_bgcolor = "white",
    )
    if title is not None:
        fig.update_layout(
            title = dict(
                text=title,
                font=go.layout.title.Font(size = title_fontsize*scale),
                x=0.5,
                y=0.98,
                xanchor="center",
                yanchor="top",
            )
        )
        fig.update_layout(margin_t=2*MARGIN)
        
    for anno in fig.layout.annotations:
        anno.font = go.layout.annotation.Font(size = legend_fontsize*scale)
    
    # ACM sigconf: paperwidth=8.5in, paperheight=11in
    width_inches = 8
    height_inches = 1.21

    fig.update_layout(
        width = width_inches * PPI * scale,
        height = height_inches * PPI * scale,
        # width = 2300,
        # height = 500,
    )
    return fig

In [10]:
def compute_rel_err(coll_metric_df, sim_name):
    hw_values = coll_metric_df[coll_metric_df["Simulator"] == gpusims.NATIVE]
    hw_values = hw_values[["Config", "Simulator", "Benchmark", "Value"]]
    # print(hw_values)
    # hw_values = hw_values.groupby(["Simulator", "Benchmark"]).sum().reset_index()
    hw_values = hw_values[["Config", "Benchmark", "Value"]]
    hw_values = hw_values.reset_index(drop=True)
    hw_values = hw_values.rename(columns={"Value": "HwValue"})
    
    sim_values = coll_metric_df[coll_metric_df["Simulator"] == sim_name]
    sim_values = sim_values[["Config", "Simulator", "Benchmark", "Value"]]
    # print(sim_values)
    # sim_values = sim_values.groupby(["Simulator", "Benchmark"]).sum().reset_index()
    sim_values = sim_values[["Config", "Benchmark", "Value"]]
    sim_values = sim_values.reset_index(drop=True)
    sim_values = sim_values.rename(columns={"Value": "SimValue"})
    joined = sim_values.merge(hw_values, how="inner", on=["Config", "Benchmark"])
    
    # print(hw_values)
    # print(sim_values)
    hw_values = joined["HwValue"]
    sim_values = joined["SimValue"]
    # print(hw_values)
    # print(sim_values)
    # print(joined)
    
    correl_co = np.nan
    if len(sim_values) < 1:
        return dict(
            mean_rel_err=np.nan,
            max_rel_err=np.nan,
            min_rel_err=np.nan,
            std_rel_err=np.nan,
            corr=correl_co,
        )

    # print(sim_values.shape)
    # errs = sim_values - hw_values
    # print(errs.shape)
    # print(sim_values.shape)
    valid = sim_values > 0.0
    hw_values = hw_values[valid]
    sim_values = sim_values[valid]
    errs = sim_values - hw_values
    # errs = errs[valid]
        
    # print("err", hw_value)
    try:
        # avoid divide by zero
        hw_values += 0.0000001
        rel_errs = errs.abs() / hw_values
        
        # print(hw_values)
        # print(sim_values)
        # print("errs", rel_errs)
        
        # avoid divide by zero if hw or sim values have zero std dev
        if len(sim_values) > 1 and np.std(hw_values) != 0.0 and np.std(sim_values) != 0.0:
            correl_co = np.corrcoef(hw_values, sim_values)[0][1]
        #else:
        #    correl_co = 0
        return dict(
            mean_rel_err=rel_errs.mean(),
            max_rel_err=rel_errs.max(),
            min_rel_err=rel_errs.min(),
            std_rel_err=rel_errs.std(),
            corr=correl_co,
        )
    except Exception as e:
        print(hw_values)
        print(sim_values)
        raise e

In [11]:
def min_max_err_table(err_data):
    sim_line = [""]
    conf_line = [""]

    plot_sims = [s for s in selected_simulators if s != gpusims.NATIVE]
    table_benchmarks = ["vectoradd", "cuda4-matrixmul", "cuda6-transpose", "babelstream"]

    for bench_key in table_benchmarks:
        bench_name = BENCH_ABBR[bench_key]
        # bench = benchmarks[bench_name]
        # bench
        # for si, sim in enumerate(plot_sims):
        # sim_line.append("\multicolumn{2}{c|}{%s}" % SIM_NAME_TEX[sim])
        # sim_line.append("\multicolumn{2}{c|}{%s}" % bench_name)
        sim_line.append(r"\textsc{%s}" % bench_name)
        # conf_line += [r"{\centering %s \par}" % configs[c].name.replace(" ", r"\newline ") for c in plot_configs]

    print(" & ".join(sim_line) + r" \\ \hline")
    print("%")
    # print(" & ".join(conf_line) + r" \\ \hline")
    # print("%")
    # for metric_key, metric_name in [("corr", "Corr."), ("err", "Rel. Err"), ("nrmse", "NRMSE")]:
    for si, sim in enumerate(plot_sims):
        for ci, conf in enumerate(plot_configs):
            line = [SIM_NAME_TEX[sim] + " " + configs[conf].name] # .split(" ")[0]
            has_value = False
            for bench_key in table_benchmarks:
                bench = benchmarks[bench_key]
                bench_name = BENCH_ABBR[bench_key]

                # print(conf, sim, bench_key)
                matches = [
                    e for e in err_data
                    if e["config"] == conf and e["sim"] == sim and e["bench"] == bench.name
                ]
                value = ""
                # assert len(matches) == 1
                if len(matches) == 1 and bench.enabled(sim):
                    match = matches[0]
                    if not (np.isnan(match["min_rel_err"]) or np.isnan(match["max_rel_err"])):
                        has_value = True
                        value = "%.1f" % (match["min_rel_err"]*100)
                        # value += r"-\newline "
                        value += r" - "
                        value += "%.1f" % (match["max_rel_err"]*100)
                        value += r"\%"
                line.append(value)
            
            # pprint(line)
            line = " & ".join(line) + r" \\"
            if not has_value:
                # comment out empty lines
                line = "% " + line
            print(line)
            # separator
            print("%")

# min_max_err_table(metric_table_data)
# pprint(metric_table_data)

In [12]:
# => per config, benchmark and input, plot bars for each simulator

metrics = {
    gpusims.plot.metrics.ExecutionTime: plot_bars_exec_time,
    gpusims.plot.metrics.Cycles: plot_bars,
    gpusims.plot.metrics.IPC: plot_bars,
    gpusims.plot.metrics.InstructionCount: plot_bars,
    gpusims.plot.metrics.L2Reads: plot_bars,
    gpusims.plot.metrics.L2Writes: plot_bars,
    gpusims.plot.metrics.L2Accesses: plot_bars,
    gpusims.plot.metrics.DRAMReads: plot_bars,
    gpusims.plot.metrics.DRAMWrites: plot_bars,
    gpusims.plot.metrics.DRAMAccesses: plot_bars,
    gpusims.plot.metrics.L2ReadHit: plot_bars,
    gpusims.plot.metrics.L2WriteHit: plot_bars,
}

if False:
    metrics = {
        # gpusims.plot.metrics.L2Accesses: plot_bars,
        gpusims.plot.metrics.ExecutionTime: plot_bars_exec_time,
        # gpusims.plot.metrics.Cycles: plot_bars,
        # gpusims.plot.metrics.DRAMReads: plot_bars,
        # gpusims.plot.metrics.DRAMWrites: plot_bars,
        # gpusims.plot.metrics.L2Reads: plot_bars,
        # gpusims.plot.metrics.L2Writes: plot_bars,
        # gpusims.plot.metrics.IPC: plot_bars,
        # 
        # gpusims.plot.metrics.DRAMAccesses: plot_bars,
        # gpusims.plot.metrics.Cycles: plot_bars,
        # gpusims.plot.metrics.InstructionCount: plot_bars,
        # gpusims.plot.metrics.L2Accesses: plot_bars,
    }

for metric_cls, metrics_plot_func in metrics.items():
    # print(metric_cls.name)
    ylabel = metric_cls.name
    if metric_cls.unit is not None:
        ylabel += f" [{metric_cls.unit}]"

    metric_table_data = []
    all_benches_metric_df = []
    for bench_key, selected_bench_inputs in selected_benchmarks:
        bench = benchmarks[bench_key]
        
        per_config_subplots = []
        per_config_metric_dfs = {}
        for config_key in selected_configs:
            config = configs[config_key]
            
            print(metric_cls.name, config_key, bench_key)
            all_metric_df = []
            
            # for inp in bench.inputs:
            supported_simulators = [
                sim_name for sim_name in selected_simulators
                if bench.enabled(sim_name) # and inp.enabled(sim_name)
            ]
            for inp_args, inp_abbr in selected_bench_inputs:
                inp = next(i for i in bench.inputs if i.args.strip() == inp_args.strip())
                assert inp is not None, f"input {inp_args} does not exist"
                
                plot_data = PlotData(benchmark=bench, config=config, inp=inp)
                #supported_simulators = [
                #    sim_name for sim_name in selected_simulators
                #    if bench.enabled(sim_name) and inp.enabled(sim_name)
                #]
                for sim_name in supported_simulators:
                    sim = simulators[sim_name]
                    
                    #if not bench.enabled(sim_name):
                    #    continue
                    if not inp.enabled(sim_name):
                        continue
                    
                    # print(sim_name, config.name, bench.name, inp.args)
                    bench_config = sim(
                        run_dir=run_dir / sim_name.lower(),
                        benchmark=bench,
                        config=config,
                    )
                    if not bench_config.input_path(inp).is_dir():
                        print(f"WARN: {bench_config.input_path(inp)} does not exist")
                        continue

                    plot_data[sim_name] = bench_config.load_dataframe(inp)

                metric = metric_cls(plot_data)
                num_blocks = int(metric.num_blocks())
                metric_df = metric.compute()
                # metric_df["Benchmark"] = f"{bench.name}<br>{inp.args}<br>({num_blocks} blocks)"
                # metric_df["Benchmark"] = f"{BENCH_ABBR[bench_key]}<br>{inp_abbr}<br>({num_blocks} blocks)"
                # metric_df["Benchmark"] = f"{BENCH_ABBR[bench_key]} {inp_abbr}<br>({num_blocks} blocks)"
                # metric_df["Benchmark"] = f"{inp_abbr}<br>({num_blocks} blocks)"
                metric_df["Benchmark"] = f"{inp_abbr} ({num_blocks} blocks)"
                metric_df["Config"] = config.name
                
                for sim_name in supported_simulators:
                    err_metrics = compute_rel_err(metric_df, sim_name=sim_name)
                    #err_metric_df = metric_df
                    # print(err_metric_df)
                    #hw_value = err_metric_df[err_metric_df["Simulator"] == gpusims.NATIVE]["Value"]
                    #hw_value = hw_value.reset_index(drop=True)
                    # print(hw_value)
                    #sim_value = err_metric_df[err_metric_df["Simulator"] == sim_name]["Value"]
                    #sim_value = sim_value.reset_index(drop=True)
                    #if len(sim_value) < 1:
                    #    rel_err = np.nan
                    #else:
                    #    # print(sim_value.shape)
                    #    err = sim_value - hw_value
                    #    valid = sim_value > 0.0
                    #    # print(valid.shape)
                    #    # print(err.shape)
                    #    err = err[valid]
                    #    hw_value = hw_value[valid]
                    #    # print("err", hw_value)
                    #    rel_err = err.abs() / (hw_value+0.0000001)
                    #    rel_err = rel_err.mean()
                    
                    log = f"{config_key} {metric_cls.name} {bench.name} {inp.args} {sim_name} rel. err: {err_metrics['mean_rel_err']:.5f} corr: {err_metrics['corr']:.5f}\n"
                    # print(log)
                    stat_log += log
                    
                all_metric_df.append(metric_df)
                all_benches_metric_df.append(metric_df)
                # continue

                # print(metric_df)
                if False:
                    # skip individual bars
                    fig = metrics_plot_func(
                        selected_simulators=supported_simulators,
                        data=metric_df,
                        config=config,
                        ylabel=ylabel,
                        title=f"{metric_cls.name} for {bench.name} {inp.args} ({config.name})",
                        log=metric_cls.log,
                    )
                    filename = ["bar", metric.name, bench.name, config.key, inp.sanitized_name()]
                    filename = Path("./figs") / gpusims.utils.slugify("_".join(filename))
                    filename = filename.with_suffix(".pdf")
                    filename.parent.mkdir(parents=True, exist_ok=True)
                    fig.write_image(filename, **PDF_OPTS)
                    # print("wrote", filename)

            all_metric_df = pd.concat(all_metric_df)
            
            # print errors for all inputs combined per simulator for this config
            for sim_name in supported_simulators:
                err_metrics = compute_rel_err(all_metric_df, sim_name=sim_name)
                #err_metric_df = all_metric_df
                # print(err_metric_df)
                #hw_value = err_metric_df[err_metric_df["Simulator"] == gpusims.NATIVE]["Benchmark", "Value"]
                #hw_value = hw_value.reset_index(drop=True)
                # print(hw_value)
                #sim_value = err_metric_df[err_metric_df["Simulator"] == sim_name]["Benchmark", "Value"]
                #sim_value = sim_value.reset_index(drop=True)
                #print(sim_value)
                #if len(sim_value) < 1:
                #    rel_err = np.nan
                #else:
                #    print(sim_value.shape)
                #    err = sim_value - hw_value
                #    print(err.shape)
                #    print(sim_value.shape)
                #    valid = sim_value > 0.0
                #    err = err[valid]
                #    hw_value = hw_value[valid]
                #    # print("err", hw_value)
                #    rel_err = err.abs() / (hw_value+0.0000001)
                #    rel_err = rel_err.mean()
                metric_table_data.append(dict(
                    config=config_key,
                    bench=bench.name,
                    sim=sim_name,
                    **err_metrics
                    # mean_rel_err=err_metrics['mean_rel_err'],
                    # max_rel_err=err_metrics['max_rel_err'],
                    # min_rel_err=err_metrics['min_rel_err'],
                    # corr=corr,
                ))
                log = f"{config_key} {metric_cls.name} {bench.name} {sim_name} rel. err: {err_metrics['mean_rel_err']:.5f} corr: {err_metrics['corr']:.5f}\n"
                # print(log)
                stat_log += log

            fig = metrics_plot_func(
                selected_simulators=supported_simulators,
                data=all_metric_df,
                config=config,
                ylabel=ylabel,
                title=f"{metric_cls.name} for {bench.name} ({config.name})",
                log=metric_cls.log,
            )
            filename = ["all_inputs_bar", metric_cls.name, bench.name, config.key]
            filename = Path("./figs") / gpusims.utils.slugify("_".join(filename))
            filename = filename.with_suffix(".pdf")
            filename.parent.mkdir(parents=True, exist_ok=True)
            fig.write_image(filename, **PDF_OPTS)
            # per_config_subplots.append(fig)
            per_config_metric_dfs[config_key] = all_metric_df
            # print("wrote", filename)
            # break
        
        for sim_name in supported_simulators:
            err_metric_df = pd.concat(per_config_metric_dfs.values())
            err_metrics = compute_rel_err(err_metric_df, sim_name=sim_name)
            #hw_value = err_metric_df[err_metric_df["Simulator"] == gpusims.NATIVE]["Value"]
            #hw_value = hw_value.reset_index(drop=True)
            # print(hw_value)
            #sim_value = err_metric_df[err_metric_df["Simulator"] == sim_name]["Value"]
            #sim_value = sim_value.reset_index(drop=True)
            #if len(sim_value) < 1:
            #    rel_err = np.nan
            #else:
            #    # print(sim_value)
            #    err = sim_value - hw_value
            #    valid = sim_value > 0.0
            #    err = err[valid]
            #    hw_value = hw_value[valid]
            #    # print(err.shape)
            #    # print(sim_value.shape)
            #    # print("err", err)
            #    rel_err = err.abs() / (hw_value+0.0000001)
            #    rel_err = rel_err.mean()
            log = f"all-configs {metric_cls.name} {bench.name} {sim_name} rel. err: {err_metrics['mean_rel_err']:.5f} corr: {err_metrics['corr']:.5f}\n"
            # print(log)
            stat_log += log
        
        if metric_cls is gpusims.plot.metrics.ExecutionTime:
            # execution time does not work with plot_subplots
            continue
        
        fig = plot_subplots(
            selected_simulators=supported_simulators,
            selected_configs=plot_configs,
            per_config_data=per_config_metric_dfs,
            ylabel=ylabel,
            # skip titles, we use captions to save space
            # title=f"{metric_cls.name} for {bench.name}",
            log=metric_cls.log,
            # barmode="stack" if metric_cls == gpusims.plot.metrics.ExecutionTime else "group",
        )
        filename = ["all_configs_bar", metric_cls.name, bench.name]
        filename = Path("./figs") / gpusims.utils.slugify("_".join(filename))
        filename = filename.with_suffix(".pdf")
        filename.parent.mkdir(parents=True, exist_ok=True)
        fig.write_image(filename, **PDF_OPTS)
        
    if metric_cls in [
        gpusims.plot.metrics.InstructionCount, gpusims.plot.metrics.L2Accesses,
        gpusims.plot.metrics.Cycles, gpusims.plot.metrics.DRAMAccesses,
    ]:
        min_max_err_table(metric_table_data)

    if metric_cls is gpusims.plot.metrics.ExecutionTime:
        all_benches_metric_df = pd.concat(all_benches_metric_df)
        _, fig = plot_mean_slowdown_per_config(
            selected_simulators=[s for s in selected_simulators if s != gpusims.NATIVE],
            df=all_benches_metric_df,
            # config_order=[configs[c].name for c in selected_configs],
        )
        filename = Path("./figs") / "mean_slowdown_per_config.pdf"
        filename.parent.mkdir(parents=True, exist_ok=True)
        fig.write_image(filename, **PDF_OPTS)
        _, fig = plot_mean_slowdown(
            selected_simulators=[s for s in selected_simulators if s != gpusims.NATIVE],
            df=all_benches_metric_df,
            # config_order=[configs[c].name for c in selected_configs],
        )
        filename = Path("./figs") / "mean_slowdown.pdf"
        filename.parent.mkdir(parents=True, exist_ok=True)
        fig.write_image(filename, **PDF_OPTS)

with open("./figs/stats.log", "w") as stat_log_file:
    stat_log_file.write(stat_log)
# per_config_metric_dfs

Execution Time sm6_gtx1080 babelstream
Execution Time sm86_a4000 babelstream
Execution Time sm86_rtx3070 babelstream
Execution Time sm6_gtx1080 vectoradd
Execution Time sm86_a4000 vectoradd
Execution Time sm86_rtx3070 vectoradd
Execution Time sm6_gtx1080 cuda4-matrixmul
Execution Time sm86_a4000 cuda4-matrixmul
Execution Time sm86_rtx3070 cuda4-matrixmul
Execution Time sm6_gtx1080 cuda10-matrixmul
Execution Time sm86_a4000 cuda10-matrixmul
Execution Time sm86_rtx3070 cuda10-matrixmul
Execution Time sm6_gtx1080 cuda6-transpose
Execution Time sm86_a4000 cuda6-transpose
Execution Time sm86_rtx3070 cuda6-transpose
Execution Time sm6_gtx1080 cuda10-transpose
Execution Time sm86_a4000 cuda10-transpose
Execution Time sm86_rtx3070 cuda10-transpose
Cycles sm6_gtx1080 babelstream
Cycles sm86_a4000 babelstream
Cycles sm86_rtx3070 babelstream
Cycles sm6_gtx1080 vectoradd
Cycles sm86_a4000 vectoradd
Cycles sm86_rtx3070 vectoradd
Cycles sm6_gtx1080 cuda4-matrixmul
Cycles sm86_a4000 cuda4-matrixmul
C

In [13]:
### debug dataframe
sim = gpusims.MULTI2SIM
# sim = gpusims.MACSIM
# sim = gpusims.TEJAS
# sim = gpusims.ACCELSIM_SASS
# sim = gpusims.ACCELSIM_PTX
# sim = gpusims.NATIVE
# benchmark = benchmarks["babelstream"]
# benchmark = benchmarks["cuda4-matrixmul"]
benchmark = benchmarks["cuda6-transpose"]
# benchmark = benchmarks["vectoradd"]
config=configs["sm86_rtx3070"]
# config=configs["sm6_gtx1080"]
# config=configs["sm86_a4000"]
bench_config = gpusims.SIMULATORS[sim](
    run_dir=run_dir / sim.lower(),
    benchmark=benchmark,
    config=config,
)
assert bench_config.benchmark.enabled(sim)
bench_input = bench_config.benchmark.inputs[-1]
print(sim, benchmark.name, bench_input.args)
df = bench_config.load_dataframe(bench_input)
df = df.round(3)
# pprint(df.columns.tolist())
# print(df.index)
print(df.shape)
try:
    print(df["launch__grid_size"].mean())
    print(df["launch__block_size"].mean())
except:
    pass
# df.T[~df.T.index.str.contains(r"^Config.|SM \d+", re.IGNORECASE)] # multi2sim
# df.T[~df.T.index.str.contains(r"_CORE_\d+$", re.IGNORECASE)] # macsim
# df.T[df.T.index.str.contains(r"ipc", re.IGNORECASE)] # filtering
df.T[df.T.index.str.contains(pat=r"kpl-l1.*Accesses", flags=re.IGNORECASE)] # filtering
# df.T[df.T.index.str.contains(pat=r"launch__grid_size|launch__block", flags=re.IGNORECASE)] # filtering
# df.T[df.T.index.str.contains(pat=r"read_lookup_hit|read_lookup_miss", flags=re.IGNORECASE)] # filtering
# df.T[df.T.index.str.contains(pat=r"occupancy|sm__warps_active.avg.pct_of_peak_sustained_active", flags=re.IGNORECASE)].mean(axis=1).to_frame() # filtering
# print(df["gpc__cycles_elapsed.avg_cycle"].sum())
# df["gpc__cycles_elapsed.avg_cycle"].values
# df.loc[df.columns[df.columns.str.contains(pat=r"cycles", flags=re.IGNORECASE)]] # filtering
# df.T[df.T.index.str.contains(r"launch__block_|launch__grid_", re.IGNORECASE)] # filtering
# df.T
# df
# .T

m2s CUDA6-transpose -repeat=1 -dimX=128 -dimY=128
(1, 5751)


Unnamed: 0_level_0,Value
Stat,Unnamed: 1_level_1
kpl-l1-28.CoalescedAccesses,0
kpl-l1-1.RetriedAccesses,0
kpl-l1-37.Accesses,0
kpl-l1-26.Accesses,0
kpl-l1-21.RetriedAccesses,0
kpl-l1-26.CoalescedAccesses,0
kpl-l1-42.Accesses,0
kpl-l1-9.CoalescedAccesses,0
kpl-l1-37.RetriedAccesses,0
kpl-l1-27.Accesses,0
