In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from pathlib import Path
import pandas as pd
import numpy as np
from copy import copy
from pprint import pprint
from plot import SIM_ABBR, SIM_NAME, SIM_COLOR, DEFAULT_LAYOUT_OPTIONS, MARGIN, human_format

import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

import itertools
import abc
import sys
import re

sys.path.append("../")
import gpusims
import gpusims.plot.metrics as metric
from gpusims.plot.data import PlotData
from gpusims.config import Config, parse_configs
from gpusims.bench import parse_benchmarks

In [3]:
pd.set_option('display.max_rows', 700)
pd.set_option('display.max_columns', 700)
np.seterr(all='raise')

{'divide': 'warn', 'over': 'warn', 'under': 'ignore', 'invalid': 'warn'}

In [4]:
root_dir = Path("/Users/roman/dev/gpgpusims")
benchmark_dir = root_dir / "benchmarks"
run_dir = root_dir / "run"
assert benchmark_dir.is_dir()
assert run_dir.is_dir()

In [5]:
simulators = copy(gpusims.SIMULATORS)
configs = parse_configs(benchmark_dir / "configs" / "configs.yml")
benchmarks = parse_benchmarks(benchmark_dir / "benchmarks.yml")

pprint(simulators)
pprint(configs)
pprint(benchmarks)

{'accelsim-ptx': <class 'gpusims.accelsim.AccelSimPTXBenchmarkConfig'>,
 'accelsim-sass': <class 'gpusims.accelsim_sass.AccelSimSASSBenchmarkConfig'>,
 'm2s': <class 'gpusims.multi2sim.Multi2SimBenchmarkConfig'>,
 'macsim': <class 'gpusims.macsim.MacSimBenchmarkConfig'>,
 'native': <class 'gpusims.native.NativeBenchmarkConfig'>,
 'tejas': <class 'gpusims.tejas.TejasBenchmarkConfig'>}
{'sm6_gtx1080': Config(key='sm6_gtx1080', name='GTX 1080', path=PosixPath('/Users/roman/dev/gpgpusims/benchmarks/configs/SM6_GTX1080'), spec={'sm_count': 20, 'clock_speed': 1607}),
 'sm86_a4000': Config(key='sm86_a4000', name='A4000', path=PosixPath('/Users/roman/dev/gpgpusims/benchmarks/configs/SM86_A4000'), spec={'sm_count': 48, 'clock_speed': 735}),
 'sm86_rtx3070': Config(key='sm86_rtx3070', name='RTX 3070', path=PosixPath('/Users/roman/dev/gpgpusims/benchmarks/configs/SM86_RTX3070'), spec={'sm_count': 46, 'clock_speed': 1132})}
{'babelstream': Benchmark(/Users/roman/dev/gpgpusims/benchmarks/BabelStrea

In [6]:
#selected_simulators = copy(gpusims.SIMULATORS)
#selected_configs = copy(configs)
#selected_benchmarks = copy(benchmarks)

# for testing
#if True:
    #testing_simulators = None
    #testing_configs = None
    #testing_benchmarks = None
    # testing_simulators = list(gpusims.SIMULATORS.keys())
    # testing_simulators = [gpusims.NATIVE]
    # testing_simulators = [gpusims.MULTI2SIM]
    # testing_configs = ["sm6_gtx1080", "sm86_a4000"]
    # testing_benchmarks = ["babelstream"] # "cuda6-transpose"]
    
    #if testing_simulators is not None:
    #    selected_simulators = {k: v for k, v in gpusims.SIMULATORS.items() if k in testing_simulators}
    #if testing_configs is not None:
    #    selected_configs = {k: v for k, v in configs.items() if k in testing_configs}
    #if testing_benchmarks is not None:
    #    selected_benchmarks = {k: v for k, v in benchmarks.items() if k in testing_benchmarks}

#pprint(selected_simulators)
#pprint(selected_configs)
#pprint(selected_benchmarks)

In [7]:
def plot_bars(
    selected_simulators, data, config,
    title=None, fontsize=14, font_family="Helvetica", round_to=1,
    ylabel=None, log=False,
):
    bars = []
    
    def sort_by_bench_size(key):
        # print(key)
        # print(bench)
        # print(data.iloc[key.index]["Value"])
        values = data.iloc[key.index]["Value"]
        return values
    
    #data = data.sort_values(
    #    by=["Simulator", "Benchmark"], key=sort_by_bench_size)
    
    data = data.set_index(["Simulator"])
    
    data["ValueStr"] = data["Value"].apply(lambda v: human_format(v, round_to=round_to))
    # simulators = data.index.get_level_values("Simulator").unique().tolist()
    # benchmarks = data["Benchmark"].unique().tolist()
    
    for i, sim in enumerate(selected_simulators):
        # add hardware GPU name
        name = sim
        if sim in SIM_NAME:
            name = SIM_NAME[sim]
        if sim == gpusims.NATIVE:
            name += f" ({config.name})"
        
        sim_data = data.loc[[sim]]
        
        bars.append(go.Bar(
            x = sim_data["Benchmark"],
            y = sim_data["Value"],
            text = sim_data["ValueStr"],
            textposition='auto',
            textangle=0,
            textfont = dict(
                size=fontsize - 2,
                color="black",
            ),
            hovertemplate = (
                "<b>%{x}</b><br>" +
                "%{y:.2f}<br>"
            ),
            name=str(name),
            marker=dict(
                color=SIM_COLOR[sim],
                line=dict(
                    color='rgba(50, 171, 96, 1.0)',
                    width=0
                ),
            ),
        ))

    layout = go.Layout(
        font_family=font_family,
        font_color="black",
        font_size=fontsize,
        title=dict(
            text=title,
            x=0.5,
            y=0.95,
            xanchor="center",
            yanchor="top",
        ),
        yaxis=go.layout.YAxis(
            title=ylabel,
            gridcolor="gray",
            zerolinecolor="gray",
        ),
        xaxis=go.layout.XAxis(
            tickfont=dict(
                size=0.8 * fontsize,
            ),
            dividerwidth=0,
            dividercolor="white",
        ),
        hoverlabel=dict(
            bgcolor="white",
            font_size=fontsize,
            font_family=font_family,
        ),
        barmode="group",
        bargroupgap=0.1,
        bargap=0.25,
        showlegend=True,
        **DEFAULT_LAYOUT_OPTIONS,
    )
    if log:
        layout.yaxis.type = "log"
    fig = go.Figure(data=bars, layout=layout)
    return fig

In [21]:
def plot_bars_exec_time(
    selected_simulators, data, config,
    title=None, fontsize=14, font_family="Helvetica", round_to=1,
    ylabel=None, log=False,
):
    bars = []
    def sort_by_bench_size(key):
        # print(key)
        # print(bench)
        # print(data.iloc[key.index]["Value"])
        values = data.iloc[key.index]["Value"]
        return values
    
    #data = data.sort_values(
    #    by=["Simulator", "Benchmark"], key=sort_by_bench_size)
    
    data = data.set_index(["Simulator", "Kind"])
    # data = data.set_index(["Simulator", "Kind"])
    # data = data.sort_values(by="Benchmark")
    # data = data.sort_index()
    # simulators = data.index.get_level_values("Simulator").unique().tolist()
    # data.index.get_level_values("Simulator")
    # simulators = [s for s in simulators if ["Value"]]
    # benchmarks = trace_time["Benchmark"].values
    
    all_benchmarks = None
    for i, sim in enumerate(selected_simulators):
        # add hardware GPU name
        name = SIM_NAME[sim]
        if sim == gpusims.NATIVE:
            name += f" ({config.name})"
        
        # trace and sim time
        trace_time = data.loc[data.index == (sim, "Trace")]
        sim_time = data.loc[data.index == (sim, "Sim")]
        if sim != gpusims.NATIVE:
            trace_time = trace_time.round(round_to)
            sim_time = sim_time.round(round_to)    
        
        # total time (trace and sim)
        total_time = trace_time["Value"].values +  sim_time["Value"].values
        if sim != gpusims.NATIVE:
            total_time = total_time.round(round_to)
        
        if total_time.sum() == 0:
            # skip simulator
            continue
        
        benchmarks = trace_time["Benchmark"].values
        if all_benchmarks is None:
            all_benchmarks = benchmarks
            
        # need same number of benches for each simulator
        assert (benchmarks == all_benchmarks).all()
        
        x = [benchmarks, [SIM_ABBR[sim]] * len(benchmarks)]
        # flat_x = [item for sublist in x for item in sublist]
        # y = [10] * len(flat_x)
        y = trace_time["Value"]
        # if sim != "Hardware":
        #     y = y.round(round_to)
        
        # data.loc[data.index == (sim, "Trace")]
        
        bars.append(go.Bar(
            x = x,
            y = y,
            text = y.apply(lambda v: f"trace<br>{v}"),
            textposition='auto',
            textangle=0,
            showlegend=False,
            textfont = dict(
                size=fontsize - 2,
                color="black",
            ),
            hovertemplate = (
                "<b>%{x}</b><br>" +
                "%{y:.2f}<br>"
            ),
            name=str(name),
            marker=dict(
                color=SIM_COLOR[sim],
                line=dict(
                    color="white",
                    # color="rgba(0,0,0,0.5)",
                    width=2,
                ),
            ),
        ))
        
        # continue
        # if sim != "Hardware":
        #     total_time = total_time.round(round_to)
        # print(total_time)
        bars.append(go.Bar(
            x = x,
            y = sim_time["Value"], # will be stacked
            text = total_time,
            textposition='auto',
            textangle=0,
            textfont = dict(
                size=fontsize - 2,
                color="black",
            ),
            hovertemplate = (
                "<b>%{x}</b><br>" +
                "%{y:.2f}<br>"
            ),
            name=str(name),
            marker=dict(
                color=SIM_COLOR[sim],
                line=dict(
                    # color=SIM_COLOR[sim],
                    # color="rgba(0,0,0,0.2)",
                    color="white",
                    width=2,
                ),
            ),
        ))
    
    # add empty separator
    x = [all_benchmarks, [""] * len(all_benchmarks)]
    y = [0] * len([item for sublist in x for item in sublist])
    bars.append(go.Bar(
        x = x,
        y = y,
        showlegend=False,
    ))
    
    margin = 50
    layout = go.Layout(
        font_family=font_family,
        font_color="black",
        font_size=fontsize,
        title=dict(
            text=title,
            x=0.5,
            y=0.95,
            xanchor="center",
            yanchor="top",
        ),
        yaxis=go.layout.YAxis(
            title=ylabel,
            gridcolor="gray",
            zerolinecolor="gray",
            # range=[min_axis_val * 0.9 ,max_axis_val*1.1]
        ),
        xaxis=go.layout.XAxis(
            # title="Benchmark",
            tickfont=dict(
                size=0.8 * fontsize,
            ),
            dividerwidth=0,
            dividercolor="white",
        ),
        hoverlabel=dict(
            bgcolor="white",
            font_size=fontsize,
            font_family=font_family,
        ),
        barmode="stack",
        # bargroupgap=0.1,
        bargap=0.1,
        showlegend=True,
        **DEFAULT_LAYOUT_OPTIONS,
    )
    if log:
        layout.yaxis.type = "log"
    return go.Figure(data=bars, layout=layout)

In [22]:
def plot_mean_slowdown_per_config(selected_simulators, df, config_order=None):
    slowdown_df = df
    # return slowdown_df, None
    # slowdown_df = slowdown_df.drop(columns=["Kind"])
    slowdown_df = slowdown_df[["Simulator", "Benchmark", "Config", "Value"]] # drop Kind
    slowdown_df = slowdown_df.groupby([
        "Simulator", "Benchmark", "Config"]).sum().reset_index()
    slowdown_df = slowdown_df[slowdown_df["Value"] > 0]
    # return slowdown_df, None

    hw = slowdown_df[slowdown_df["Simulator"] == gpusims.NATIVE]
    hw = hw[["Benchmark", "Config", "Value"]]
    hw = hw.rename(columns={"Value": "HardwareValue"})

    final_slowdown_df = []

    # slowdown_df = slowdown_df[slowdown_df["Value"] > 0]
    # print(slowdown_df.shape)
    # [["Simulator", "Value"]]
    # slowdown_df_group = slowdown_df.groupby("Simulator")
    for sim, sim_df in slowdown_df.groupby("Simulator"):
        if sim == gpusims.NATIVE:
            continue

        # print(sim_df.shape)
        # sim_df = sim_df[["Benchmark", "Config", "Value"]]
        # return sim_df, None
        sim_df = sim_df.rename(columns={"Value": "SimValue"})
        sim_df = sim_df.merge(hw, how="inner", on=["Benchmark", "Config"])
        sim_df["Value"] = sim_df["SimValue"] / sim_df["HardwareValue"]
        sim_df = sim_df[["Simulator", "Config", "Value"]]
        sim_df = sim_df.groupby(["Simulator", "Config"]).mean()
        sim_df = sim_df.reset_index()
        final_slowdown_df.append(sim_df)
        
    
    final_slowdown_df = pd.concat(final_slowdown_df)
    if config_order is not None:
        final_slowdown_df["ConfigIdx"] = final_slowdown_df["Config"].apply(lambda c: config_order.index(c))
        final_slowdown_df = final_slowdown_df.sort_values(by=["ConfigIdx"])
    final_slowdown_df = final_slowdown_df.rename(columns={"Config": "Benchmark"})
    # return final_slowdown_df, None
    return final_slowdown_df, plot_bars(
        selected_simulators=selected_simulators,
        data=final_slowdown_df,
        config=config,
        ylabel="Slowdown factor",
        title=f"Mean simulation slowdown",
        log=True,
    )

def plot_mean_slowdown(selected_simulators, df, config_order=None):
    slowdown_df = df
    slowdown_df = slowdown_df.drop(columns=["Kind", "Config"])
    slowdown_df = slowdown_df.groupby([
        "Simulator", "Benchmark"]).sum().reset_index()
    slowdown_df = slowdown_df[slowdown_df["Value"] > 0]
    # return slowdown_df, None

    hw = slowdown_df[slowdown_df["Simulator"] == gpusims.NATIVE]
    hw = hw[["Benchmark", "Value"]]
    hw = hw.rename(columns={"Value": "HardwareValue"})

    final_slowdown_df = []

    for sim, sim_df in slowdown_df.groupby("Simulator"):
        if sim == gpusims.NATIVE:
            continue

        # print(sim_df.shape)
        sim_df = sim_df.rename(columns={"Value": "SimValue"})
        sim_df = sim_df.merge(hw, how="inner", on=["Benchmark"])
        sim_df["Value"] = sim_df["SimValue"] / sim_df["HardwareValue"]
        sim_df = sim_df[["Simulator", "Value"]]
        sim_df = sim_df.groupby(["Simulator"]).mean()
        sim_df = sim_df.reset_index()
        final_slowdown_df.append(sim_df)
        
    
    final_slowdown_df = pd.concat(final_slowdown_df)
    final_slowdown_df["Benchmark"] = ""
    return final_slowdown_df, plot_bars(
        selected_simulators=selected_simulators,
        data=final_slowdown_df,
        config=config,
        ylabel="Slowdown factor",
        title=f"Mean simulation slowdown",
        log=True,
    )

if False:
    final_slowdown_df, fig = plot_mean_slowdown(
        selected_simulators=[s for s in selected_simulators if s != gpusims.NATIVE],
        df=all_benches_metric_df,
        config_order=[configs[c].name for c in selected_configs],
    )
    fig.show()
# final_slowdown_df

In [23]:
def plot_subplots(
    selected_simulators, selected_configs, per_config_data,
    title=None, fontsize=20, bar_fontsize=16, legend_fontsize=15, tick_fontsize=13,
    font_family="Helvetica", round_to=1,
    bargroupgap=0.1, bargap=0.1,
    ylabel=None, log=False,
):
    bars = []
    subplot_titles = []
    for ci, config_key in enumerate(selected_configs):
        ci += 1
        print(config_key)
        if config_key not in per_config_data:
            continue
            
        config = configs[config_key]
        subplot_titles.append(config.name)
        data = per_config_data[config_key]
        data = data.set_index(["Simulator"])
    
        data["ValueStr"] = data["Value"].apply(lambda v: human_format(v, round_to=round_to))

        for si, sim in enumerate(selected_simulators):
            name = sim
            if sim in SIM_NAME:
                name = SIM_NAME[sim]

            sim_data = data.loc[[sim]]

            bars.append(go.Bar(
                x = sim_data["Benchmark"],
                y = sim_data["Value"],
                text = sim_data["ValueStr"],
                textposition='auto',
                textangle=0,
                textfont = dict(
                    size=bar_fontsize,
                    color="black",
                ),
                hovertemplate = (
                    "<b>%{x}</b><br>" +
                    "%{y:.2f}<br>"
                ),
                xaxis = "x" if ci <= 1 else f"x{ci}",
                name=str(name),
                showlegend=ci==1,
                marker=dict(
                    color=SIM_COLOR[sim],
                    line=dict(
                        color='rgba(50, 171, 96, 1.0)',
                        width=0,
                    ),
                ),
            ))

    layout = make_subplots(rows=1, cols=len(selected_configs),
                  subplot_titles=subplot_titles,
                  shared_yaxes=True, horizontal_spacing=0.05).layout
    for ci in range(1, len(selected_configs)+1):
        xaxis = "xaxis" if ci <= 1 else f"xaxis{ci}"
        yaxis = "yaxis" if ci <= 1 else f"yaxis{ci}"
        if yaxis in layout:
            if log:
                layout[yaxis].type = "log"
            layout[yaxis].title = ylabel
            layout[yaxis].gridcolor = "gray"
            layout[yaxis].zerolinecolor = "gray"
        if xaxis in layout:
            layout[xaxis].tickfont = go.layout.xaxis.Tickfont(size = tick_fontsize)
            layout[xaxis].dividerwidth = 0
            layout[xaxis].dividercolor = "white"
    # print(layout)
    
    layout.font=go.layout.Font(
        family = font_family,
        color = "black",
        size = fontsize,
    )
    layout.hoverlabel = dict(
        bgcolor = "white",
        font_size = fontsize,
        font_family = font_family,
    )
    layout.barmode = "group"
    layout.bargroupgap = bargroupgap
    layout.bargap = bargap
    layout.legend = go.layout.Legend(
        font=go.layout.legend.Font(size = legend_fontsize),
    )
    layout.title = dict(
        text=title,
        font=go.layout.title.Font(size = fontsize),
        x=0.5,
        y=0.95,
        xanchor="center",
        yanchor="top",
    )
    layout.plot_bgcolor = "white"
    layout.margin = go.layout.Margin(
        pad=10,
        autoexpand=True,
        l=MARGIN, r=MARGIN, t=1.5*MARGIN, b=MARGIN
    )
    layout.width = 2000
    layout.height = 550
    
    fig = go.Figure(data=bars, layout=layout)
    return fig

In [24]:
# => per config, benchmark and input, plot bars for each simulator

metrics = {
    gpusims.plot.metrics.ExecutionTime: plot_bars_exec_time,
    gpusims.plot.metrics.Cycles: plot_bars,
    gpusims.plot.metrics.IPC: plot_bars,
    gpusims.plot.metrics.InstructionCount: plot_bars,
    gpusims.plot.metrics.L2Reads: plot_bars,
    gpusims.plot.metrics.L2Writes: plot_bars,
    gpusims.plot.metrics.L2Accesses: plot_bars,
    gpusims.plot.metrics.DRAMReads: plot_bars,
    gpusims.plot.metrics.DRAMWrites: plot_bars,
    gpusims.plot.metrics.L2ReadHit: plot_bars,
    gpusims.plot.metrics.L2WriteHit: plot_bars,
}
if False:
    metrics = {
        # gpusims.plot.metrics.L2Writes: plot_bars,
        # gpusims.plot.metrics.ExecutionTime: plot_bars_exec_time,
        gpusims.plot.metrics.Cycles: plot_bars,
        # gpusims.plot.metrics.IPC: plot_bars,
        # gpusims.plot.metrics.InstructionCount: plot_bars,
    }

# define ordering that makes sense (e.g. hw and accel close)
selected_simulators = [
    gpusims.TEJAS, gpusims.MACSIM, gpusims.MULTI2SIM,
    gpusims.ACCELSIM_PTX, gpusims.ACCELSIM_SASS, gpusims.NATIVE]

# define ordering of inputs that makes sense
selected_benchmarks = [
    ("babelstream", [
        "--arraysize 1024 --numtimes 1",
        "--arraysize 10240 --numtimes 1",
        "--arraysize 102400 --numtimes 1",
        # "--arraysize 1024 --numtimes 2",
    ]),
    ("vectoradd", [inp.args for inp in benchmarks["vectoradd"].inputs]),
    ("cuda4-matrixmul", [inp.args for inp in benchmarks["cuda4-matrixmul"].inputs]),
    ("cuda10-matrixmul", [
        "-wA=32 -hA=32 -wB=32 -hB=32",
        "-wA=128 -hA=128 -wB=128 -hB=128",
        "-wA=512 -hA=512 -wB=512 -hB=512",
        # "-wA=32 -hA=64 -wB=64 -hB=32",
    ]),
    ("cuda6-transpose", [
        "-repeat=1 -dimX=32 -dimY=32",
        "-repeat=1 -dimX=64 -dimY=64",
        "-repeat=1 -dimX=128 -dimY=128",
        # "-repeat=3 -dimX=32 -dimY=32",
    ]),
    ("cuda10-transpose", [
        "-repeat=1 -dimX=32 -dimY=32",
        "-repeat=1 -dimX=64 -dimY=64",
        "-repeat=1 -dimX=128 -dimY=128",
        # "-repeat=3 -dimX=32 -dimY=32",
    ]),
]

# define ordering that makes sense
selected_configs = ["sm6_gtx1080", "sm86_a4000", "sm86_rtx3070"]
plot_configs = ["sm6_gtx1080", "sm86_rtx3070"]

for metric_cls, metrics_plot_func in metrics.items():
    print(metric_cls.name)
    ylabel = metric_cls.name
    if metric_cls.unit is not None:
        ylabel += f" [{metric_cls.unit}]"

    all_benches_metric_df = []
    for bench_name, selected_bench_inputs in selected_benchmarks:
        bench = benchmarks[bench_name]
        
        per_config_subplots = []
        per_config_metric_dfs = {}
        for config_key in selected_configs:
            config = configs[config_key]
            
            # print(config_name, bench_name)
            all_metric_df = []
            
            # for inp in bench.inputs:
            supported_simulators = [
                sim_name for sim_name in selected_simulators
                if bench.enabled(sim_name) # and inp.enabled(sim_name)
            ]
            for inp_args in selected_bench_inputs:
                inp = next(i for i in bench.inputs if i.args.strip() == inp_args.strip())
                assert inp is not None, f"input {inp_args} does not exist"
                
                plot_data = PlotData(benchmark=bench, config=config, inp=inp)
                #supported_simulators = [
                #    sim_name for sim_name in selected_simulators
                #    if bench.enabled(sim_name) and inp.enabled(sim_name)
                #]
                for sim_name in supported_simulators:
                    sim = simulators[sim_name]
                    
                    #if not bench.enabled(sim_name):
                    #    continue
                    if not inp.enabled(sim_name):
                        continue
                    
                    # print(sim_name, config.name, bench.name, inp.args)
                    bench_config = sim(
                        run_dir=run_dir / sim_name.lower(),
                        benchmark=bench,
                        config=config,
                    )
                    if not bench_config.input_path(inp).is_dir():
                        print(f"WARN: {bench_config.input_path(inp)} does not exist")
                        continue

                    plot_data[sim_name] = bench_config.load_dataframe(inp)

                metric = metric_cls(plot_data)
                num_blocks = int(metric.num_blocks())
                metric_df = metric.compute()
                metric_df["Benchmark"] = f"{bench.name}<br>{inp.args}<br>({num_blocks} blocks)"
                metric_df["Config"] = config.name

                all_metric_df.append(metric_df)
                all_benches_metric_df.append(metric_df)
                # continue

                # print(metric_df)
                if False:
                    # skip individual bars
                    fig = metrics_plot_func(
                        selected_simulators=supported_simulators,
                        data=metric_df,
                        config=config,
                        ylabel=ylabel,
                        title=f"{metric_cls.name} for {bench.name} {inp.args} ({config.name})",
                        log=metric_cls.log,
                    )
                    filename = ["bar", metric.name, bench.name, config.key, inp.sanitized_name()]
                    filename = Path("./figs") / gpusims.utils.slugify("_".join(filename))
                    filename = filename.with_suffix(".pdf")
                    filename.parent.mkdir(parents=True, exist_ok=True)
                    fig.write_image(filename, format='pdf')
                    # print("wrote", filename)

            all_metric_df = pd.concat(all_metric_df)
            fig = metrics_plot_func(
                selected_simulators=supported_simulators,
                data=all_metric_df,
                config=config,
                ylabel=ylabel,
                title=f"{metric_cls.name} for {bench.name} ({config.name})",
                log=metric_cls.log,
            )
            filename = ["all_inputs_bar", metric_cls.name, bench.name, config.key]
            filename = Path("./figs") / gpusims.utils.slugify("_".join(filename))
            filename = filename.with_suffix(".pdf")
            filename.parent.mkdir(parents=True, exist_ok=True)
            fig.write_image(filename, format='pdf')
            # per_config_subplots.append(fig)
            per_config_metric_dfs[config_key] = all_metric_df
            # print("wrote", filename)
            # break
            
        fig = plot_subplots(
            selected_simulators=supported_simulators,
            selected_configs=plot_configs,
            per_config_data=per_config_metric_dfs,
            ylabel=ylabel,
            title=f"{metric_cls.name} for {bench.name}",
            log=metric_cls.log,
        )
        filename = ["all_configs_bar", metric_cls.name, bench.name]
        filename = Path("./figs") / gpusims.utils.slugify("_".join(filename))
        filename = filename.with_suffix(".pdf")
        filename.parent.mkdir(parents=True, exist_ok=True)
        fig.write_image(filename, format='pdf')

    if metric_cls is gpusims.plot.metrics.ExecutionTime:
        all_benches_metric_df = pd.concat(all_benches_metric_df)
        _, fig = plot_mean_slowdown_per_config(
            selected_simulators=[s for s in selected_simulators if s != gpusims.NATIVE],
            df=all_benches_metric_df,
            config_order=[configs[c].name for c in selected_configs],
        )
        filename = Path("./figs") / "mean_slowdown_per_config.pdf"
        filename.parent.mkdir(parents=True, exist_ok=True)
        fig.write_image(filename, format='pdf')
        _, fig = plot_mean_slowdown(
            selected_simulators=[s for s in selected_simulators if s != gpusims.NATIVE],
            df=all_benches_metric_df,
            config_order=[configs[c].name for c in selected_configs],
        )
        filename = Path("./figs") / "mean_slowdown.pdf"
        filename.parent.mkdir(parents=True, exist_ok=True)
        fig.write_image(filename, format='pdf')

# all_metric_df

Execution Time
sm6_gtx1080
sm86_rtx3070
sm6_gtx1080
sm86_rtx3070
sm6_gtx1080
sm86_rtx3070
sm6_gtx1080
sm86_rtx3070
sm6_gtx1080
sm86_rtx3070
sm6_gtx1080
sm86_rtx3070
Cycles
sm6_gtx1080
sm86_rtx3070
sm6_gtx1080
sm86_rtx3070
sm6_gtx1080
sm86_rtx3070
sm6_gtx1080
sm86_rtx3070
sm6_gtx1080
sm86_rtx3070
sm6_gtx1080
sm86_rtx3070
Total IPC
sm6_gtx1080
sm86_rtx3070
sm6_gtx1080
sm86_rtx3070
sm6_gtx1080
sm86_rtx3070
sm6_gtx1080
sm86_rtx3070
sm6_gtx1080
sm86_rtx3070
sm6_gtx1080
sm86_rtx3070
Total Instruction Count
sm6_gtx1080
sm86_rtx3070
sm6_gtx1080
sm86_rtx3070
sm6_gtx1080
sm86_rtx3070
sm6_gtx1080
sm86_rtx3070
sm6_gtx1080
sm86_rtx3070
sm6_gtx1080
sm86_rtx3070
Total L2 Reads
sm6_gtx1080
sm86_rtx3070
sm6_gtx1080
sm86_rtx3070
sm6_gtx1080
sm86_rtx3070
sm6_gtx1080
sm86_rtx3070
sm6_gtx1080
sm86_rtx3070
sm6_gtx1080
sm86_rtx3070
Total L2 Writes
sm6_gtx1080
sm86_rtx3070
sm6_gtx1080
sm86_rtx3070
sm6_gtx1080
sm86_rtx3070
sm6_gtx1080
sm86_rtx3070
sm6_gtx1080
sm86_rtx3070
sm6_gtx1080
sm86_rtx3070
Total L2 Acce

In [None]:
### debug dataframe
# sim = gpusims.MULTI2SIM
# sim = gpusims.MACSIM
# sim = gpusims.TEJAS
# sim = gpusims.ACCELSIM_SASS
# sim = gpusims.ACCELSIM_PTX
sim = gpusims.NATIVE
benchmark = benchmarks["babelstream"]
# benchmark = benchmarks["cuda4-matrixmul"]
# benchmark = benchmarks["cuda6-transpose"]
# benchmark = benchmarks["vectoradd"]
config=configs["sm86_rtx3070"]
# config=configs["sm6_gtx1080"]
# config=configs["sm86_a4000"]
bench_config = gpusims.SIMULATORS[sim](
    run_dir=run_dir / sim.lower(),
    benchmark=benchmark,
    config=config,
)
assert bench_config.benchmark.enabled(sim)
bench_input = bench_config.benchmark.inputs[2]
print(sim, benchmark.name, bench_input.args)
df = bench_config.load_dataframe(bench_input)
df = df.round(3)
# pprint(df.columns.tolist())
# print(df.index)
print(df.shape)
try:
    print(df["launch__grid_size"].mean())
    print(df["launch__block_size"].mean())
except:
    pass
# df.T[~df.T.index.str.contains(r"^Config.|SM \d+", re.IGNORECASE)] # multi2sim
# df.T[~df.T.index.str.contains(r"_CORE_\d+$", re.IGNORECASE)] # macsim
# df.T[df.T.index.str.contains(r"ipc", re.IGNORECASE)] # filtering
df.T[df.T.index.str.contains(pat=r"read_lookup_hit|read_lookup_miss", flags=re.IGNORECASE)] # filtering
# print(df["gpc__cycles_elapsed.avg_cycle"].sum())
# df["gpc__cycles_elapsed.avg_cycle"].values
# df.loc[df.columns[df.columns.str.contains(pat=r"cycles", flags=re.IGNORECASE)]] # filtering
# df.T[df.T.index.str.contains(r"launch__block_|launch__grid_", re.IGNORECASE)] # filtering
# df.T
# df
# .T