In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from pathlib import Path
import pandas as pd
import numpy as np
import itertools
from copy import copy
from pprint import pprint
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

import abc
import sys
import re
sys.path.append("../")
import gpusims
import gpusims.plot.metrics as metric
from gpusims.plot.data import PlotData
from gpusims.config import Config, parse_configs
from gpusims.bench import parse_benchmarks

In [3]:
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
np.seterr(all='raise')

{'divide': 'warn', 'over': 'warn', 'under': 'ignore', 'invalid': 'warn'}

In [5]:
root_dir = Path("/Users/roman/dev/gpgpusims")
benchmark_dir = root_dir / "benchmarks"
run_dir = root_dir / "run"
assert benchmark_dir.is_dir()
assert run_dir.is_dir()

In [6]:
configs = parse_configs(benchmark_dir / "configs" / "configs.yml")
benchmarks = parse_benchmarks(benchmark_dir / "benchmarks.yml")

pprint(configs)
pprint(benchmarks)
pprint(gpusims.SIMULATORS)

{'sm6_gtx1080': Config(key='sm6_gtx1080', name='GTX 1080', path=PosixPath('/Users/roman/dev/gpgpusims/benchmarks/configs/SM6_GTX1080'), spec={'sm_count': 20, 'clock_speed': 1607}),
 'sm86_a4000': Config(key='sm86_a4000', name='A4000', path=PosixPath('/Users/roman/dev/gpgpusims/benchmarks/configs/SM86_A4000'), spec={'sm_count': 48, 'clock_speed': 735}),
 'sm86_rtx3070': Config(key='sm86_rtx3070', name='RTX 3070', path=PosixPath('/Users/roman/dev/gpgpusims/benchmarks/configs/SM86_RTX3070'), spec={'sm_count': 46, 'clock_speed': 1132})}
{'babelstream': Benchmark(/Users/roman/dev/gpgpusims/benchmarks/BabelStream),
 'cuda10-matrixmul': Benchmark(/Users/roman/dev/gpgpusims/benchmarks/CUDA10-matrixMul),
 'cuda10-transpose': Benchmark(/Users/roman/dev/gpgpusims/benchmarks/CUDA10-transpose),
 'cuda4-matrixmul': Benchmark(/Users/roman/dev/gpgpusims/benchmarks/CUDA4-matrixMul),
 'cuda6-transpose': Benchmark(/Users/roman/dev/gpgpusims/benchmarks/CUDA6-transpose),
 'vectoradd': Benchmark(/Users/roma

In [7]:
selected_simulators = copy(gpusims.SIMULATORS)
selected_configs = copy(configs)
selected_benchmarks = copy(benchmarks)

# for testing
if True:
    testing_simulators = None
    testing_configs = None
    testing_benchmarks = None
    # testing_simulators = list(gpusims.SIMULATORS.keys())
    # testing_simulators = [gpusims.NATIVE]
    # testing_simulators = [gpusims.MULTI2SIM]
    testing_configs = ["sm6_gtx1080", "sm86_a4000"]
    # testing_benchmarks = ["babelstream"] # "cuda6-transpose"]
    
    if testing_simulators is not None:
        selected_simulators = {k: v for k, v in gpusims.SIMULATORS.items() if k in testing_simulators}
    if testing_configs is not None:
        selected_configs = {k: v for k, v in configs.items() if k in testing_configs}
    if testing_benchmarks is not None:
        selected_benchmarks = {k: v for k, v in benchmarks.items() if k in testing_benchmarks}

pprint(selected_simulators)
pprint(selected_configs)
pprint(selected_benchmarks)

{'accelsim-ptx': <class 'gpusims.accelsim.AccelSimPTXBenchmarkConfig'>,
 'accelsim-sass': <class 'gpusims.accelsim_sass.AccelSimSASSBenchmarkConfig'>,
 'm2s': <class 'gpusims.multi2sim.Multi2SimBenchmarkConfig'>,
 'macsim': <class 'gpusims.macsim.MacSimBenchmarkConfig'>,
 'native': <class 'gpusims.native.NativeBenchmarkConfig'>,
 'tejas': <class 'gpusims.tejas.TejasBenchmarkConfig'>}
{'sm6_gtx1080': Config(key='sm6_gtx1080', name='GTX 1080', path=PosixPath('/Users/roman/dev/gpgpusims/benchmarks/configs/SM6_GTX1080'), spec={'sm_count': 20, 'clock_speed': 1607}),
 'sm86_a4000': Config(key='sm86_a4000', name='A4000', path=PosixPath('/Users/roman/dev/gpgpusims/benchmarks/configs/SM86_A4000'), spec={'sm_count': 48, 'clock_speed': 735})}
{'babelstream': Benchmark(/Users/roman/dev/gpgpusims/benchmarks/BabelStream),
 'cuda10-matrixmul': Benchmark(/Users/roman/dev/gpgpusims/benchmarks/CUDA10-matrixMul),
 'cuda10-transpose': Benchmark(/Users/roman/dev/gpgpusims/benchmarks/CUDA10-transpose),
 'cu

In [8]:
colors = ["#5F34FA", "#49DFE3", "#8CFA5D", "#E3BC49", "#FF7357", "#EE34FA"]
sim_color = {
    "GPUTejas": "#7E61FA", # colors[0],
    "AccelSim PTX": colors[1],
    "AccelSim SASS": colors[2],
    "Hardware": "#FF3C1E", # colors[3],
    "Multi2Sim": colors[4],
    "MacSim": colors[5],
}
abbr = {
    "GPUTejas": "Tejas",
    "AccelSim PTX": "AS PTX",
    "AccelSim SASS": "AS SASS",
    "Hardware": "HW",
    "Multi2Sim": "M2S",
    "MacSim": "MS",
}

margin = 50
default_layout_options = dict(
    plot_bgcolor="white",
    margin=dict(
        pad=10,
        autoexpand=True,
        l=margin, r=margin, t=1.5*margin, b=margin
    ),
    width=900,
    height=500,
)

def hex_to_rgb(hex_color):
    hex_color = hex_color.lstrip("#")
    if len(hex_color) == 3:
        hex_color = hex_color * 2
    return int(hex_color[0:2], 16), int(hex_color[2:4], 16), int(hex_color[4:6], 16)

In [9]:
def plot_scatter(
    data, config, metric_cls,
    title=None, fontsize=14, font_family="Helvetica", round_to=2
):
    traces = []
    data = data.set_index(["Simulator"])
    data = data.sort_values(by="Benchmark")
    data = data.sort_index()
    simulators = data.index.get_level_values("Simulator").unique().tolist()
    all_data = []
    
    for sim in simulators:
        if sim == "Hardware":
            continue
        
        sim_values = data.loc[data.index == sim] # .reset_index()
        sim_values = sim_values.rename(columns={"Value": "SimValue"})
        
        hw_values = data.loc[data.index == "Hardware"] # .reset_index()
        hw_values = hw_values.rename(columns={"Value": "HwValue"})
        values = sim_values.merge(hw_values, on="Benchmark")
        
        hw_values = values["HwValue"].to_numpy()
        sim_values = values["SimValue"].to_numpy()
            
        # print(sim, "hw", hw_values)
        # print(sim, "sim", sim_values)
        all_data.append(values)
        
        if sim_values.sum() > 0:
            correl_co = np.corrcoef(hw_values, sim_values)[0][1]
        else:
            correl_co = 0
        errs = sim_values - hw_values
        
        rel_errs = np.absolute(errs) / (hw_values+0.0000001)
        assert rel_errs.shape == errs.shape
        
        assert len(errs) > 0
        mean_total_err = np.absolute(rel_errs).sum() / len(errs)
        
        assert hw_values.sum() > 0
        mean_total_agg_err = np.absolute(errs).sum() / hw_values.sum()
        mae = np.absolute(errs).sum() / len(errs)
        
        assert len(hw_values) > 0
        mse = np.power(errs, 2).sum() / len(hw_values)
        nrmse = np.power(errs, 2).sum() / np.power(hw_values, 2).sum()

        traces.append(go.Scatter(
            x = values["HwValue"],
            y = values["SimValue"],
            hovertext = values["Benchmark"],
            mode = 'markers',
            marker = dict(
                size = 10,
                color = "rgba(%d, %d, %d, %f)" % (*hex_to_rgb(sim_color[sim]), 0.7),
                symbol = "x",
            ),
            #error_x=dict(
            #    type='data',
            #    symmetric=False,
            #    array=hw_error,
            #    arrayminus=hw_error_min,
            #    visible=True
            #),
            name="{}<br>Corr={:.3f}, Err={:.1f}%, NRMSE={:.1f}%<br>".format(
                sim, correl_co, mean_total_err*100, nrmse*100),
        ))

    all_data = pd.concat(all_data, axis=0)
    all_values = all_data[["HwValue", "SimValue"]]
    min_ax_val = all_values.min().min()
    max_ax_val = all_values.max().max()
    
    pad = 0.1
    real_x_range = [min_ax_val, max_ax_val]
    
    typ = "log"
    # typ = "linear"
    if typ == "log":
        # closest power of 10
        if min_ax_val > 0:
            x_range_min = np.log10(min_ax_val)
        else:
            x_range_min = min_ax_val
        assert max_ax_val > 0
        x_range = [np.floor(x_range_min), np.log10(max_ax_val) * (1+pad)]
    else:
        x_range = [min_ax_val, max_ax_val * (1+pad)]
    
    print(x_range)
    
    gridcolor = "rgba(128,128,128,.4)"
    annotations = []
    if False:
        annotations = [
            go.layout.Annotation(
                text=sim,
                xref='paper', # use paper coordinates
                yref='paper', # use paper coordinates
                x=0.01 + i * 0.15, # position in norm. coord
                y=1.20, # position in norm. coord  
                xanchor="center",
                font=go.layout.annotation.Font(
                    size=fontsize,
                    color='Black'
                ),
                showarrow=False,
                bgcolor=sim_color[sim],
                # bordercolor='#FFFFFF',
                borderwidth=0,
                borderpad=6,
            ) for i, sim in enumerate(simulators)
        ]
    layout = go.Layout(
        title=dict(
            text=title,
            x=0.5,
            xanchor="center",
            yanchor="top",
        ),
        font_family="Helvetica",
        font_color="black",
        font_size=16,
        xaxis=dict(
            title=f"Hardware {metric_cls.name}",
            gridcolor=gridcolor,
            zerolinecolor=gridcolor,
            type=typ,
            range=x_range,
        ),
        yaxis=dict(
            title=f"Simulation {metric_cls.name}",
            gridcolor=gridcolor,
            zerolinecolor=gridcolor,
            type=typ,
            range=x_range,
        ),
        annotations=annotations,
        **default_layout_options,
    )
    
    if True:
        xyrange = np.array([0, layout.xaxis.range[1]])
        if typ == "log":
            xyrange = np.power(xyrange, 10)
        xyline = go.Scatter(
            x=xyrange,
            y=xyrange,
            showlegend=False,
            mode="lines",
            line=dict(
                # color='rgba(255,0,0,.7)',
                color="black",
            ),
        )
        traces.append(xyline)
        
    fig = go.Figure(data=traces, layout=layout)
    return fig

In [10]:
# => per config, benchmark and input, plot bars for each simulator

metrics = {
    gpusims.plot.metrics.Cycles: plot_scatter,
    gpusims.plot.metrics.L2ReadHit: plot_scatter,
    gpusims.plot.metrics.DRAMReads: plot_scatter,
    gpusims.plot.metrics.DRAMWrites: plot_scatter,
    gpusims.plot.metrics.IPC: plot_scatter,
    gpusims.plot.metrics.InstructionCount: plot_scatter,
}

for metric_cls, metrics_plot_func in metrics.items():
    for (config_name, config) in selected_configs.items():
        all_metric_df = []
        for (bench_name, bench) in selected_benchmarks.items():
            for inp in bench.inputs:
                print(config_name, bench_name, inp)
                
                plot_data = PlotData(benchmark=bench, config=config, inp=inp)
                for (sim_name, sim) in selected_simulators.items():
                    if not bench.enabled(sim_name):
                        continue
                    # print(sim_name, config_name, bench_name)
                    bench_config = sim(
                        run_dir=run_dir / sim_name.lower(),
                        benchmark=bench,
                        config=config,
                    )
                    if not bench_config.input_path(inp).is_dir():
                        print(f"WARN: {bench_config.input_path(inp)} does not exist")
                        continue

                    plot_data[sim_name] = bench_config.load_dataframe(inp)

                metric = metric_cls(plot_data)
                metric_df = metric.compute()
                metric_df["Benchmark"] = f"{bench.name}<br>{inp.args}"

                all_metric_df.append(metric_df)

        all_metric_df = pd.concat(all_metric_df)
        # break
        
        fig = metrics_plot_func(
            data=all_metric_df,
            config=config,
            metric_cls=metric_cls,
            title=f"{metric_cls.name} Correlation ({config.name})",
        )
        # fig.show()
        filename = ["scatter", metric_cls.name, config.key]
        filename = Path("./figs") / gpusims.utils.slugify("_".join(filename))
        filename = filename.with_suffix(".pdf")
        fig.write_image(filename, format='pdf')
        print("wrote", filename)

# all_metric_df

sm6_gtx1080 vectoradd Input(vectorAdd 1000)
sm6_gtx1080 vectoradd Input(vectorAdd 1000000)
sm6_gtx1080 cuda10-transpose Input(transpose -repeat=1 -dimX=32 -dimY=32)
sm6_gtx1080 cuda10-transpose Input(transpose -repeat=3 -dimX=32 -dimY=32)
sm6_gtx1080 cuda10-transpose Input(transpose -repeat=1 -dimX=64 -dimY=64)
sm6_gtx1080 cuda10-transpose Input(transpose -repeat=1 -dimX=128 -dimY=128)
sm6_gtx1080 cuda6-transpose Input(transpose -repeat=1 -dimX=32 -dimY=32)
sm6_gtx1080 cuda6-transpose Input(transpose -repeat=3 -dimX=32 -dimY=32)
sm6_gtx1080 cuda6-transpose Input(transpose -repeat=1 -dimX=64 -dimY=64)
sm6_gtx1080 cuda6-transpose Input(transpose -repeat=1 -dimX=128 -dimY=128)
sm6_gtx1080 cuda10-matrixmul Input(matrixMul -wA=32 -hA=32 -wB=32 -hB=32)
sm6_gtx1080 cuda10-matrixmul Input(matrixMul -wA=32 -hA=64 -wB=64 -hB=32)
sm6_gtx1080 cuda10-matrixmul Input(matrixMul -wA=128 -hA=128 -wB=128 -hB=128)
sm6_gtx1080 cuda10-matrixmul Input(matrixMul -wA=512 -hA=512 -wB=512 -hB=512)
sm6_gtx1080 c

FileNotFoundError: [Errno 2] No such file or directory: '/Users/roman/dev/gpgpusims/run/tejas/sm6_gtx1080/cuda4-matrixmul/input-512/results/stats.csv'