## Exploring the gem5 stats


In [1]:
% matplotlib inline
% load_ext autoreload
% autoreload 2

## Parsing

In [87]:
import glob

from autorocks.dir_struct import RootDir

exp_name = "fft_transpose"  # "stencil_stencil3d"  # aes_workload
exp_date = "2021_09_06_12_13"  # "2021_09_01_01_47" #
model = "BoTorch"
iteration = 1
exp_dir = RootDir.parents[1] / f"local_execution/gem5/{exp_name}/20_params"
specific_file = exp_dir / f"100_iter/{model}/{exp_date}/{iteration}/env_output"

## Cache stats parser

In [88]:
# specific_file can be replaced with exp_dir
cache_stats_files = glob.glob(str(specific_file / "**" / "*cache_stats.txt"), recursive=True)

# this can become a loop
cache_stats_file = cache_stats_files[0]

In [89]:
import re

FLOAT_NUM_PARSER = r"(\d+.?\d*[e\+[0-9]+]?)"  # TODO: move to the constants

parser = re.compile(r"(\S+) (\d+.?\d*[e\+[0-9]+]?)")

# TODO: create dictionary to hold the cache values
with open(cache_stats_file, "r") as f:
    f_contents = f.read()

matched_res = parser.findall(f_contents)
for res in matched_res:
    key, val = res
    print(f"{key}: {float(val)}")

system.datapath.dcache.average_pwr: 682.69
system.datapath.dcache.dynamic_pwr: 177.689
system.datapath.dcache.leakage_pwr: 505.0
system.datapath.dcache.area: 479679000.0
system.datapath.tlb.average_pwr: 1.13028
system.datapath.tlb.dynamic_pwr: 0.0961073
system.datapath.tlb.leakage_pwr: 1.03418
system.datapath.tlb.area: 157000.0


## Stats file parser

https://www.gem5.org/documentation/general_docs/statistics/api

Contains few things:


Histogram:

the smallest/largest value being sampled
the number of values that are smaller/larger than the specified minimum and maximum
the sum of all samples
the mean, the geometric mean and the standard deviation of the samples
histogram within the range of [min, max] splitted into (max-min)/bucket_size equally sized buckets, where the min/max/bucket_size are inputs to the init() function.


In [90]:
# specific_file can be replaced with exp_dir
stats_files = glob.glob(str(specific_file / "**" / "stats.txt"), recursive=True)

# this can become a loop
stats_file = stats_files[0]

with open(stats_file, "r") as f:
    stats_contents = f.read()

In [91]:
from re import RegexFlag

FLOAT_NUM_PARSER = r"(\d+.?\d*[e\+[0-9]+]?)"  # TODO: move to the constants

system_parser = re.compile(r"^(\w+)\s+" + FLOAT_NUM_PARSER, RegexFlag.MULTILINE)

system_metrics = system_parser.findall(stats_contents)
for m in system_metrics:
    key, val = m
    print(f"{key}: {float(val)}")

# TODO: figure out how to combine the system metrics.
# It is not just sums

final_tick: 6488518956.0
host_inst_rate: 97986.0
host_mem_usage: 5926276.0
host_op_rate: 218178.0
host_seconds: 13.01
host_tick_rate: 498649019.0
sim_freq: 1000000000000.0
sim_insts: 1275009.0
sim_ops: 2838973.0
sim_seconds: 0.006489
sim_ticks: 6488518956.0
final_tick: 27393465312.0
host_inst_rate: 163034.0
host_mem_usage: 5926276.0
host_op_rate: 330471.0
host_seconds: 32.75
host_tick_rate: 638314861.0
sim_freq: 1000000000000.0
sim_insts: 5339384.0
sim_ops: 10822981.0
sim_seconds: 0.020905
sim_ticks: 20904946356.0


In [92]:
val_re = r"([\d+.?\d*[e\+[0-9]+]?|nan|inf)"
dis_func_re = r"::(\D\S+)"  # ignore buckets that starts with numbers but allow numbers in middle (for example Sha256
key_re = r"^(\S+)"

histo_parser_re = rf"{key_re}{dis_func_re}\s+{val_re}"
print(f"parser regex: {histo_parser_re}")
histo_parser = re.compile(histo_parser_re, RegexFlag.MULTILINE)
histo_metrics = histo_parser.findall(stats_contents)
for m in histo_metrics:
    key, func, val = m
    print(f"{func}, {key}: {float(val)}")

parser regex: ^(\S+)::(\D\S+)\s+([\d+.?\d*[e\+[0-9]+]?|nan|inf)
samples, system.cpu.commit.committed_per_cycle: 1000302.0
mean, system.cpu.commit.committed_per_cycle: 2.838116
stdev, system.cpu.commit.committed_per_cycle: 2.618819
underflows, system.cpu.commit.committed_per_cycle: 0.0
overflows, system.cpu.commit.committed_per_cycle: 0.0
min_value, system.cpu.commit.committed_per_cycle: 0.0
max_value, system.cpu.commit.committed_per_cycle: 8.0
total, system.cpu.commit.committed_per_cycle: 1000302.0
No_OpClass, system.cpu.commit.op_class_0: 92321.0
IntAlu, system.cpu.commit.op_class_0: 2276626.0
IntMult, system.cpu.commit.op_class_0: 1358.0
IntDiv, system.cpu.commit.op_class_0: 35804.0
FloatAdd, system.cpu.commit.op_class_0: 2060.0
FloatCmp, system.cpu.commit.op_class_0: 0.0
FloatCvt, system.cpu.commit.op_class_0: 0.0
FloatMult, system.cpu.commit.op_class_0: 0.0
FloatMultAcc, system.cpu.commit.op_class_0: 0.0
FloatDiv, system.cpu.commit.op_class_0: 0.0
FloatMisc, system.cpu.commit.op_cl

In [93]:
key_re = r"^(\w+(?:\.\w+)+)"
other_parser = re.compile(rf"{key_re}\s+{val_re}", RegexFlag.MULTILINE)
other_metrics = other_parser.findall(stats_contents)
for m in other_metrics:
    key, val = m
    print(f"{key}: {float(val)}")

system.cpu.branchPred.BTBCorrect: 0.0
system.cpu.branchPred.BTBHitPct: 0.0
system.cpu.branchPred.BTBHits: 0.0
system.cpu.branchPred.BTBLookups: 414276.0
system.cpu.branchPred.RASInCorrect: 0.0
system.cpu.branchPred.condIncorrect: 12247.0
system.cpu.branchPred.condPredicted: 458413.0
system.cpu.branchPred.indirectHits: 298684.0
system.cpu.branchPred.indirectLookups: 414276.0
system.cpu.branchPred.indirectMisses: 115592.0
system.cpu.branchPred.lookups: 489448.0
system.cpu.branchPred.usedRAS: 14701.0
system.cpu.branchPredindirectMispredicted: 4647.0
system.cpu.cc_regfile_reads: 2175323.0
system.cpu.cc_regfile_writes: 1001800.0
system.cpu.commit.amos: 0.0
system.cpu.commit.branchMispredicts: 12260.0
system.cpu.commit.branches: 389727.0
system.cpu.commit.bw_lim_events: 142129.0
system.cpu.commit.commitNonSpecStalls: 18.0
system.cpu.commit.commitSquashedInsts: 346016.0
system.cpu.commit.committedInsts: 1275009.0
system.cpu.commit.committedOps: 2838973.0
system.cpu.commit.fp_insts: 58537.0
sy

In [94]:
with open(stats_file, "r") as f:
    print(len(f.readlines()))

2647


In [95]:
full_hist_len = len(histo_metrics)
system_len = len(system_metrics)
others_len = len(other_metrics)

Missing stuff

Validate the parsers didn't miss out on anything important

In [96]:
with open(stats_file, "r") as f:
    content_lines = f.readlines()
for line in content_lines:
    if system_parser.findall(line):
        continue
    if other_parser.findall(line):
        continue
    if histo_parser.findall(line):
        continue
    print(line)



---------- Begin Simulation Statistics ----------

# Stats desc: fft_transpose_datapath completed.

system.cpu.commit.committed_per_cycle::0       187845     18.78%     18.78% # Number of insts commited each cycle

system.cpu.commit.committed_per_cycle::1       224279     22.42%     41.20% # Number of insts commited each cycle

system.cpu.commit.committed_per_cycle::2       120181     12.01%     53.21% # Number of insts commited each cycle

system.cpu.commit.committed_per_cycle::3       180957     18.09%     71.30% # Number of insts commited each cycle

system.cpu.commit.committed_per_cycle::4        83629      8.36%     79.67% # Number of insts commited each cycle

system.cpu.commit.committed_per_cycle::5        22325      2.23%     81.90% # Number of insts commited each cycle

system.cpu.commit.committed_per_cycle::6        24411      2.44%     84.34% # Number of insts commited each cycle

system.cpu.commit.committed_per_cycle::7        14546      1.45%     85.79% # Number of insts

## Creating DF

* Split the read file into X number of simulations.
* Parse the one with the highest ticks
* Return DF with three columns: System, Stats, Others

In [97]:
import pandas as pd

simulations = stats_contents.split("\n---------- Begin Simulation Statistics ----------\n")

all_system = []
all_statistics = []
all_other = []

for order, sim in enumerate(simulations):
    if not sim:
        continue
    sim_df = pd.DataFrame()

    system_metrics = system_parser.findall(sim)
    system_df = pd.DataFrame(system_metrics).set_index(0).T.astype(float)

    histo_metrics = histo_parser.findall(sim)
    statistics_df = pd.DataFrame(histo_metrics).set_index([0, 1]).T.astype(float)

    other_metrics = other_parser.findall(sim)
    other_df = pd.DataFrame(other_metrics).set_index(0).T.astype(float)

    system_df["simulation_order"] = order
    statistics_df["simulation_order"] = order
    other_df["simulation_order"] = order

    all_system.append(system_df)
    all_statistics.append(statistics_df)
    all_other.append(other_df)

all_system_df = pd.concat(all_system)
all_statistics_df = pd.concat(all_statistics)
all_other_df = pd.concat(all_other)

In [98]:
# choose the one with highest sim_ticks
all_system_df.iloc[all_system_df.sim_ticks.argmax()]

0
final_tick          2.739347e+10
host_inst_rate      1.630340e+05
host_mem_usage      5.926276e+06
host_op_rate        3.304710e+05
host_seconds        3.275000e+01
host_tick_rate      6.383149e+08
sim_freq            1.000000e+12
sim_insts           5.339384e+06
sim_ops             1.082298e+07
sim_seconds         2.090500e-02
sim_ticks           2.090495e+10
simulation_order    2.000000e+00
Name: 1, dtype: float64

## Viz

Correlate the results to performance

In [99]:
int(all_system_df.iloc[all_system_df.sim_ticks.argmax()].simulation_order)

2

In [100]:
all_other_df

Unnamed: 0,system.cpu.branchPred.BTBCorrect,system.cpu.branchPred.BTBHitPct,system.cpu.branchPred.BTBHits,system.cpu.branchPred.BTBLookups,system.cpu.branchPred.RASInCorrect,system.cpu.branchPred.condIncorrect,system.cpu.branchPred.condPredicted,system.cpu.branchPred.indirectHits,system.cpu.branchPred.indirectLookups,system.cpu.branchPred.indirectMisses,...,system.cpu.dcache.replacements,system.cpu.dcache.tags.tagsinuse,system.cpu.dcache.tags.total_refs,system.cpu.dcache.tags.sampled_refs,system.cpu.dcache.tags.avg_refs,system.cpu.dcache.tags.warmup_cycle,system.cpu.dcache.tags.tag_accesses,system.cpu.dcache.tags.data_accesses,simulation_order,system.cpu.quiesceCycles
1,0.0,0.0,0.0,414276.0,0.0,12247.0,458413.0,298684.0,414276.0,115592.0,...,149.0,878.237669,8694.0,149.0,58.348993,349392.0,812611.0,812611.0,1,
1,0.0,0.0,0.0,1064399.0,1.0,46616.0,1153972.0,634983.0,1064399.0,429416.0,...,666.0,1568.793123,1764415.0,2609.0,676.280184,0.0,2733453.0,2733453.0,2,27247.0


In [106]:
from autorocks.envs.gem5.parsers.stats_parser import parse_statistics

res = parse_statistics(stats_file)

In [109]:
res.histograms.T

Unnamed: 0_level_0,Unnamed: 1_level_0,2
0,1,Unnamed: 2_level_1
system.cpu.commit.committed_per_cycle,samples,3.265466e+06
system.cpu.commit.committed_per_cycle,mean,2.444983e+00
system.cpu.commit.committed_per_cycle,stdev,2.561176e+00
system.cpu.commit.committed_per_cycle,underflows,0.000000e+00
system.cpu.commit.committed_per_cycle,overflows,0.000000e+00
...,...,...
system.cpu.dcache.tags.occ_blocks,.cpu.data,6.562207e+02
system.cpu.dcache.tags.occ_blocks,.cpu.dcache.prefetcher,9.125724e+02
system.cpu.dcache.tags.occ_percent,.cpu.data,3.204200e-01
system.cpu.dcache.tags.occ_percent,.cpu.dcache.prefetcher,4.455920e-01


# Parse results of all experiments