In [1]:
import json
import yaml
import pandas as pd
import numpy as np
from pprint import pprint
from pathlib import Path

In [2]:
# load the materialized benchmark config
benchmark_file = "../test-apps/test-apps-materialized.yml"
with open(benchmark_file, "rb") as f:
    benchmarks = yaml.safe_load(f)

benchmarks = benchmarks["benchmarks"]
benchmark_names = list(benchmarks.keys())
pprint(benchmark_names)

['vectorAdd', 'simple_matrixmul', 'matrixmul', 'transpose']


In [3]:
# define targets to use
targets = {
    "accelsim_simulate": "",
    "simulate": "",
    "playground_simulate": "",
    "profile": "",
    "simulate": "",
    "trace": "",
}

In [5]:
# check all benchmark configs for vectoradd
vectoradd = benchmarks["vectorAdd"]
pprint([(b["input_idx"], b["executable"], b["args"]) for b in vectoradd])
pprint(vectoradd[0])

[(0, '/home/roman/dev/box/test-apps/vectoradd/vectoradd', ['100', '32']),
 (1, '/home/roman/dev/box/test-apps/vectoradd/vectoradd', ['1000', '32']),
 (2, '/home/roman/dev/box/test-apps/vectoradd/vectoradd', ['10000', '32'])]
{'accelsim_simulate': {'concurrency': None,
                       'config': '/home/roman/dev/box/accelsim/gtx1080/gpgpusim.config',
                       'config_dir': '/home/roman/dev/box/accelsim/gtx1080',
                       'enabled': True,
                       'inter_config': '/home/roman/dev/box/accelsim/gtx1080/config_fermi_islip.icnt',
                       'repetitions': 2,
                       'results_dir': '/home/roman/dev/box/results',
                       'stats_dir': '/home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100/accelsim-sim',
                       'timeout': None,
                       'trace_config': '/home/roman/dev/box/accelsim/gtx1080/gpgpusim.trace.config'},
 'accelsim_trace': {'concurrency': 1,
            

In [31]:
accesses_df = pd.read_csv(Path(benchmarks["vectorAdd"][0]["simulate"]["stats_dir"]) / "stats.accesses.csv", header=None, names=["access", "count"])
accesses_df[accesses_df["count"] > 0]

Unnamed: 0,access,count
0,GLOBAL_ACC_R,8
1,GLOBAL_ACC_W,4
2,INST_ACC_R,2


In [32]:
sim_df = pd.read_csv(Path(benchmarks["vectorAdd"][0]["simulate"]["stats_dir"]) / "stats.sim.csv", header=0)
sim_df

Unnamed: 0,cycles,instructions
0,80,1324


In [18]:
dram_df = pd.read_csv(Path(benchmarks["vectorAdd"][0]["simulate"]["stats_dir"]) / "stats.dram.csv")
dram_total = dram_df["reads"] + dram_df["writes"]
dram_df[dram_total > 0]

Unnamed: 0,chip_id,bank_id,reads,writes
0,0,0,4,0
1,0,1,4,0


In [19]:
dram_banks_df = pd.read_csv(Path(benchmarks["vectorAdd"][0]["simulate"]["stats_dir"]) / "stats.dram.banks.csv")
dram_banks_total = dram_banks_df["reads"] + dram_banks_df["writes"]
dram_banks_df[dram_banks_total > 0]

Unnamed: 0,core_id,chip_id,bank_id,reads,writes
0,0,0,0,1,0
1,0,0,1,1,0


In [33]:
instructions_df = pd.read_csv(Path(benchmarks["vectorAdd"][0]["simulate"]["stats_dir"]) / "stats.instructions.csv", header=None, names=["memory_space", "write", "count"])
instructions_df[instructions_df["count"] > 0]

Unnamed: 0,memory_space,write,count
0,Global,False,200
1,Global,True,100


In [34]:
l2d_df = pd.read_csv(Path(benchmarks["vectorAdd"][0]["simulate"]["stats_dir"]) / "stats.l2d.csv", header=None, names=["cache_id", "access_type", "status", "count"])
l2d_df[l2d_df["count"] > 0]

Unnamed: 0,cache_id,access_type,status,count
7,0,GLOBAL_ACC_R,MISS,4
51,0,GLOBAL_ACC_W,MISS,2
95,0,INST_ACC_R,MISS,1
128,1,GLOBAL_ACC_R,MISS,4
172,1,GLOBAL_ACC_W,MISS,2
216,1,INST_ACC_R,MISS,1


In [44]:
accel_stats_df = pd.read_csv(Path(benchmarks["vectorAdd"][0]["accelsim_simulate"]["stats_dir"]) / "stats.csv", header=None, names=["kernel", "kernel_id", "stat", "value"])
accel_stats_df

FileNotFoundError: [Errno 2] No such file or directory: '/home/roman/dev/box/results/vectorAdd/vectorAdd-dtype-32-length-100/accelsim-sim/stats.csv'

In [43]:
native_df = pd.read_json(Path(benchmarks["vectorAdd"][0]["profile"]["profile_dir"]) / "profile.metrics.json").T
native_df

Unnamed: 0,value,unit
Device,NVIDIA GeForce GTX 1080 (0),
Context,1.0,
Stream,7.0,
Kernel,_Z6vecAddIfEvPT_S1_S1_i,
Correlation_ID,1.0,
...,...,...
dram_write_transactions,12.0,
dram_read_throughput,1.316697,GB/s
dram_write_throughput,197.31192,MB/s
dram_write_bytes,384.0,


In [41]:
with open(Path(benchmarks["vectorAdd"][0]["profile"]["profile_dir"]) / "profile.commands.json", "rb") as f:
    commands_json = json.load(f)
# print(commands_json)
native_commands_df = pd.DataFrame.from_dict([{k: v["value"] for k, v in e.items()} for e in commands_json])
if True:
    # , header=None, names=["kernel", "kernel_id", "stat", "value"])
    native_commands_df = native_commands_df[~native_commands_df["Correlation_ID"].isnull()]
    # remove memcopies
    native_commands_df = native_commands_df[~native_commands_df["Name"].str.contains(r"\[CUDA memcpy .*\]")]
    # name refers to kernels now
    native_commands_df = native_commands_df.rename(columns={"Name": "Kernel"})
    # remove columns that are only relevant for memcopies
    # df = df.loc[:,df.notna().any(axis=0)]
    native_commands_df = native_commands_df.drop(columns=["Size", "Throughput", "SrcMemType", "DstMemType"])
    # set the correct dtypes
    native_commands_df = native_commands_df.astype({
        "Start": "float64",
        "Duration": "float64",
        "Static SMem": "float64",
        "Dynamic SMem": "float64",
        "Device": "string",
        "Kernel": "string",
    })
native_commands_df

Unnamed: 0,Start,Duration,Grid X,Grid Y,Grid Z,Block X,Block Y,Block Z,Registers Per Thread,Static SMem,Dynamic SMem,Device,Context,Stream,Kernel,Correlation_ID
3,255434.89,3.264,1.0,1.0,1.0,1024.0,1.0,1.0,8.0,0.0,0.0,NVIDIA GeForce GTX 1080 (0),1,7,_Z6vecAddIfEvPT_S1_S1_i,123


In [35]:
all_accel_stats = list(accel_stats_df["stat"].unique())
 'gpu_tot_sim_cycle',
 'gpu_total_instructions',
 'l2_cache_constant_read_hit',
 'l2_cache_constant_read_hit_reserved',
 'l2_cache_constant_read_miss',
 'l2_cache_constant_read_mshr_hit',
 'l2_cache_constant_read_reservation_fail',
 'l2_cache_constant_read_sector_miss',
 'l2_cache_global_read_hit',
 'l2_cache_global_read_hit_reserved',
 'l2_cache_global_read_miss',
 'l2_cache_global_read_mshr_hit',
 'l2_cache_global_read_reservation_fail',
 'l2_cache_global_read_sector_miss',
 'l2_cache_global_read_total',
 'l2_cache_global_write_hit',
 'l2_cache_global_write_hit_reserved',
 'l2_cache_global_write_miss',
 'l2_cache_global_write_mshr_hit',
 'l2_cache_global_write_reservation_fail',
 'l2_cache_global_write_sector_miss',
 'l2_cache_global_write_total',
 'l2_cache_inst_read_hit',
 'l2_cache_inst_read_hit_reserved',
 'l2_cache_inst_read_miss',
 'l2_cache_inst_read_mshr_hit',
 'l2_cache_inst_read_reservation_fail',
 'l2_cache_inst_read_sector_miss',
 'l2_cache_l1_write_alloc_read_hit',
 'l2_cache_l1_write_alloc_read_hit_reserved',
 'l2_cache_l1_write_alloc_read_miss',
 'l2_cache_l1_write_alloc_read_mshr_hit',
 'l2_cache_l1_write_alloc_read_reservation_fail',
 'l2_cache_l1_write_alloc_read_sector_miss',
 'l2_cache_l1_writeback_hit',
 'l2_cache_l1_writeback_hit_reserved',
 'l2_cache_l1_writeback_miss',
 'l2_cache_l1_writeback_mshr_hit',
 'l2_cache_l1_writeback_reservation_fail',
 'l2_cache_l1_writeback_sector_miss',
 'l2_cache_l2_write_alloc_read_hit',
 'l2_cache_l2_write_alloc_read_hit_reserved',
 'l2_cache_l2_write_alloc_read_miss',
 'l2_cache_l2_write_alloc_read_mshr_hit',
 'l2_cache_l2_write_alloc_read_reservation_fail',
 'l2_cache_l2_write_alloc_read_sector_miss',
 'l2_cache_l2_writeback_hit',
 'l2_cache_l2_writeback_hit_reserved',
 'l2_cache_l2_writeback_miss',
 'l2_cache_l2_writeback_mshr_hit',
 'l2_cache_l2_writeback_reservation_fail',
 'l2_cache_l2_writeback_sector_miss',
 'l2_cache_local_read_hit',
 'l2_cache_local_read_hit_reserved',
 'l2_cache_local_read_miss',
 'l2_cache_local_read_mshr_hit',
 'l2_cache_local_read_reservation_fail',
 'l2_cache_local_read_sector_miss',
 'l2_cache_local_write_hit',
 'l2_cache_local_write_hit_reserved',
 'l2_cache_local_write_miss',
 'l2_cache_local_write_mshr_hit',
 'l2_cache_local_write_reservation_fail',
 'l2_cache_local_write_sector_miss',
 'l2_cache_texture_read_hit',
 'l2_cache_texture_read_hit_reserved',
 'l2_cache_texture_read_miss',
 'l2_cache_texture_read_mshr_hit',
 'l2_cache_texture_read_reservation_fail',
 'l2_cache_texture_read_sector_miss',
 'num_dram_full_stalls',
 'num_global_mem_read',
 'num_global_mem_write',
 'num_interconn_to_shared_mem_stalls',
 'num_intra_warp_mshr_merge',
 'num_issued_blocks',
 'num_load_inst',
 'num_local_mem_read',
 'num_local_mem_write',
 'num_param_mem_inst',
 'num_register_set_bank_conflict_stalls',
 'num_shared_mem_bank_conflicts',
 'num_shared_mem_inst',
 'num_shared_mem_stalls',
 'num_star_inst',
 'num_store_inst',
 'num_tex_inst',
 'num_tex_mem_total_accesses'

['gpgpu_silicon_slowdown',
 'gpgpu_simulation_rate',
 'gpgpu_simulation_time_sec',
 'gpu_ipc',
 'gpu_occupancy',
 'gpu_tot_ipc',
 'gpu_tot_sim_cycle',
 'gpu_total_instructions',
 'kernel_launch_uid',
 'l1_const_cache_total_accesses',
 'l1_const_cache_total_misses',
 'l1_const_cache_total_pending_hits',
 'l1_const_cache_total_reservation_fails',
 'l1_data_cache_data_port_utilization',
 'l1_data_cache_fill_port_utilization',
 'l1_data_cache_total_accesses',
 'l1_data_cache_total_misses',
 'l1_data_cache_total_pending_hits',
 'l1_data_cache_total_reservation_fails',
 'l1_inst_cache_total_accesses',
 'l1_inst_cache_total_miss_rate',
 'l1_inst_cache_total_misses',
 'l1_inst_cache_total_pending_hits',
 'l1_inst_cache_total_reservation_fails',
 'l1_tex_cache_total_accesses',
 'l1_tex_cache_total_misses',
 'l1_tex_cache_total_pending_hits',
 'l1_tex_cache_total_reservation_fails',
 'l2_bandwidth_gbps',
 'l2_cache_constant_read_hit',
 'l2_cache_constant_read_hit_reserved',
 'l2_cache_constant_r

In [5]:
with open(Path(benchmarks["vectorAdd"][0]["simulate"]["stats_dir"]) / "stats.json", "rb") as f:
    stats_json = json.load(f)
pprint(stats_json.keys())

dict_keys(['accesses', 'instructions', 'sim', 'dram', 'l1i_stats', 'l1c_stats', 'l1t_stats', 'l1d_stats', 'l2d_stats'])


In [10]:
# plot execution times
for bench_config in [b for b in benchmarks["vectorAdd"] if b["input_idx"] == 2]:
    # pprint(bench_config)
    
    stats_dir = Path(bench_config["accelsim_simulate"]["stats_dir"])
    with open(stats_dir / "exec_time.json", "rb") as f:
        exec_time = json.load(f)
    print(exec_time)

    stats_dir = Path(bench_config["playground_simulate"]["stats_dir"])
    with open(stats_dir / "exec_time.json", "rb") as f:
        exec_time = json.load(f)
    print(exec_time)

    stats_dir = Path(bench_config["simulate"]["stats_dir"])
    with open(stats_dir / "exec_time.json", "rb") as f:
        exec_time = json.load(f)
    print(exec_time)

    # stats_df = pd.read_json(stats_dir / "stats.json", orient='index')
    with open(stats_dir / "stats.json", "rb") as f:
        stats_json = json.load(f)
    # accesses_df = pd.from_dict(stats_json["accesses"])
    # pprint(stats_json)
    pprint(stats_json["accesses"])
    accesses_df = pd.DataFrame.from_dict(stats_json["accesses"], orient='index')
    pprint(stats_json["sim"])
    sim_df = pd.DataFrame.from_dict(stats_json["sim"], orient='index')
    
    def df_from_nested_dict(d):
        return pd.concat({k: pd.DataFrame(v).T for k, v in d.items()}, axis=0)

    pprint(stats_json["dram"])
    # dram_df = pd.concat({k: df_from_nested_dict(v) for k, v in stats_json["dram"].items()}, axis=0)
    dram_per_bank_df["bank_writes"] = pd.concat({k: df_from_nested_dict(v) for k, v in stats_json["dram"]["bank_writes"].items()}, axis=0)

    if False:
        dram_df = pd.DataFrame()
        dram_stats = list(stats_json["dram"].items())
        pprint(dram_stats)
        for i in  range(len(dram_stats)):
            for j in range(len(dram_stats[i])):
                print(dram_stats[i][j])
                dram_df = pd.concat([dram_df , pd.DataFrame(dram_stats[i][j])]).reset_index(drop=True)
    
# stats_df
accesses_df
sim_df
dram_df

924
127
115
{'GLOBAL_ACC_R': 626, 'GLOBAL_ACC_W': 313, 'INST_ACC_R': 2}
{'cycles': 1911, 'instructions': 40240}
{'bank_reads': [[[65,
                  65,
                  63,
                  62,
                  64,
                  64,
                  63,
                  62,
                  64,
                  64,
                  57,
                  56,
                  48,
                  48,
                  48,
                  48]]],
 'bank_writes': [[[16,
                   16,
                   16,
                   16,
                   16,
                   16,
                   20,
                   20,
                   32,
                   32,
                   25,
                   24,
                   16,
                   16,
                   16,
                   16]]],
 'total_bank_reads': [[260,
                       260,
                       252,
                       248,
                       256,
                      

AttributeError: 'list' object has no attribute 'items'

In [None]:
# test iterating over the benchmarks
for bench