In [1]:

%matplotlib inline
%load_ext autoreload
%autoreload 2

In [None]:
from autorocks.data.loader.all_models_result_aggregator import create_all_models_comparison_dataset

# Viz cell

from autorocks.dir_struct import RootDir
from autorocks.envs.gem5.benchmarks.benchmark_tasks import MachSuiteTask

# output_location = "/Users/salabed/workspace/latex_writings/papers/mlsys21_autobo/figs"
output_location = "/home/salabed/workspace/latex/papers/osdi21_bograph/figs"
output_format = "svg"  # pdf
if output_format == "svg":
    output_location = f"{output_location}/svg"
save_res = False

exp_name = str(MachSuiteTask.SPMV_ELLPACK)
exp_dir = RootDir.parent / f"local_execution/gem5osdi/{exp_name}/20_params/100_iter"
model_comparison_data = create_all_models_comparison_dataset(exp_dir)

In [None]:
import autorocks.viz.viz as viz

model_comparison_data_c = viz.unify_model_name(model_comparison_data)
color_palette = viz.create_color_palette(
    model_comparison_data_c, ["#9b59b6", "#3498db", "#95a5a6", "#e74c3c", "#34495e", "#2ecc71"]
)
latency = 1 / model_comparison_data_c.system_performance["detailed_stats.system.sim_seconds"]
power = model_comparison_data_c.system_performance["bench_stats.avg_power"]
pdp = power * latency
# pdp = self.avg_power * self.cycle
import numpy as np

edp = np.log(latency * pdp.values)
edp = np.log(np.power(latency, 2) * power)
# pdp
model_comparison_data_c.system_performance["bench_stats.edp"] = edp

In [None]:
fig = viz.model_perf_plot(
    model_perf_df=model_comparison_data_c.model_performance,
    model_palette_map=color_palette,
    comparison_col="inference_time",
)

if save_res:
    fig.savefig(
        f"{output_location}/{exp_name}_exetime.{output_format}", bbox_inches="tight", format=f"{output_format}", dpi=600
    )

fig

In [None]:
fig = viz.perf_boxplot(
    perf_df=model_comparison_data_c.system_performance,
    optimization_type=viz.OptimizationType.MINIMIZE,
    ylabel="log(EDP(x))",
    comparison_col="bench_stats.edp",
    model_palette_map=color_palette,
    # horizontal_line='Default'
)
if save_res:
    fig.savefig(
        f"{output_location}/{exp_name}_reduced_epd_perf.{output_format}",
        bbox_inches="tight",
        format=f"{output_format}",
        dpi=600,
    )
fig

In [None]:
convergence_df = model_comparison_data_c.system_performance.copy()

convergence_df["rolling"] = convergence_df.groupby(["model", "iteration"]).agg(
    f"cum{str(viz.OptimizationType.MINIMIZE)}"
)["bench_stats.edp"]
#

In [None]:
from sklearn.preprocessing import MinMaxScaler
import pandas as pd

scaler = MinMaxScaler()
arr_scaled = scaler.fit_transform(convergence_df["rolling"].exe_time.reshape(-1, 1))
convergence_df["scaled"] = pd.DataFrame(arr_scaled, columns=["rolling_scaled"], index=convergence_df["rolling"].index)

In [None]:
convergence_df["model"].unique()

In [None]:
fig = viz.convergence_lintplot_roi(
    df=convergence_df,  # model_comparison_data_c.system_performance,
    optimization_type=viz.OptimizationType.MINIMIZE,
    # ylabel="EDP in LogScale",
    model_baseline="Default",
    # column_name='bench_stats.edp',
    column_name="rolling",
    model_palette_map=color_palette,
)
fig.savefig(
    f"{output_location}/{exp_name}_convergence.{output_format}", bbox_inches="tight", format=f"{output_format}", dpi=600
)

fig

# Find all optimized target against ticks



In [None]:
from notebooks.gem5.statistics_parser import all_models_parser

all_system_stuff_df = all_models_parser(exp_dir).system

In [None]:
model_comparison_data_c.system_performance

In [None]:
model_comparison_data_c.system_performance.columns

In [None]:
cols = [
    "model",
    "step",
    "iteration",
    "bench_stats.cycle",
    "bench_stats.avg_power",
    "bench_stats.total_area",
    "bench_stats.edp",
    "detailed_stats.system.sim_ticks",
    "detailed_stats.system.sim_seconds",
    "detailed_stats.system.sim_ops",
]
tables_res_df = model_comparison_data_c.system_performance.copy()[cols]
# convergence_df
# sim_insts                                       23036                       # Number of instructions simulated
# sim_ops                                         59090                       # Number of ops (including micro ops) simulated
# sim_seconds                                  0.000387                       # Number of seconds simulated
# sim_ticks                                   387118212                       # Number of ticks simulated
# system_stuff_df = all_system_stuff_df[["model", "step",
#                                        "iteration",
#                                        # "sim_ticks",
#                                        "sim_seconds",
#                                        # "sim_ops"
#                                        ]]
#
# tables_res_df = tables_res_df.merge(system_stuff_df, on = ["model", "step", "iteration"])

In [None]:
tables_res_df = tables_res_df.rename(
    columns={
        "bench_stats.edp": "EDP",
        "bench_stats.cycle": "Cycles",
        "bench_stats.avg_power": "Power",
        "bench_stats.total_area": "Area",
        "detailed_stats.system.sim_ticks": "Ticks",
        "detailed_stats.system.sim_seconds": "Seconds",
        "detailed_stats.system.sim_ops": "Ops",
    }
)
tables_res_df

In [None]:
best_found = tables_res_df.loc[tables_res_df.groupby(["model", "iteration"]).EDP.idxmin()].reset_index(drop=True)
best_found["Latency/Op"] = (best_found["Cycles"] / best_found["Ops"]) * 1e6
best_found["Power/Op"] = best_found["Power"] / best_found["Ops"] * 1e6
best_found["Area"] = best_found["Area"] / 1e6
best_found["Latency"] = 1 / best_found["Seconds"]
best_found = best_found.rename(columns={"Area": "Area(mm^2)"})
# best_found["Power/Seconds"] = best_found['Power'] / best_found['Seconds'] * 1e+6
best_found = best_found.drop(columns=["Ops", "step", "Ticks", "iteration"])
best_found
# power in mW

In [None]:
groupped_df = best_found.groupby("model").agg("median")
groupped_df

In [None]:
print(
    groupped_df.reset_index()
    .sort_values(by="EDP")
    .to_latex(
        index=False,
        float_format="%.2f",
        columns=[
            "model",
            "EDP",
            "Latency",
            "Power",
            # "Cycles",
            "Power/Op",
            "Area(mm^2)",
        ],
    )
)

In [None]:
fig = viz.convergence_lineplot(
    convergence_df=convergence_df,  # model_comparison_data_c.system_performance,
    optimization_type=viz.OptimizationType.MINIMIZE,
    ylabel="Log(EDP(x))",
    # column_name='bench_stats.edp',
    column_name="rolling",
    convergence_plot=False,
    model_palette_map=color_palette,
)
fig.savefig(
    f"{output_location}/{exp_name}_convergence_reduced.{output_format}",
    bbox_inches="tight",
    format=f"{output_format}",
    dpi=600,
)

fig


# Combined one graph


In [None]:
task_name_to_res = []

In [None]:
perf_df = model_comparison_data_c.system_performance

perf_df = (
    perf_df[["bench_stats.edp", "iteration", "model"]]
    .groupby(["model", "iteration"])
    .agg(str(viz.OptimizationType.MINIMIZE))
    .reset_index()
)

perf_df

In [None]:
perf_df = model_comparison_data_c.system_performance
perf_df = (
    perf_df[["bench_stats.edp", "iteration", "model"]]
    .groupby(["model", "iteration"])
    .agg(str(viz.OptimizationType.MINIMIZE))
    .reset_index()
)
perf_df["task"] = exp_name
perf_df.to_dict()
task_name_to_res.append(perf_df)

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

DPI = 600  # default dpi for most printers
plt.style.use("ggplot")
sns.set_theme(style="ticks", rc={"axes.spines.right": False, "axes.spines.top": False})
sns.set_context("paper")  # , font_scale=1.5, rc={"lines.linewidth": 1.5})
plt.rcParams["svg.fonttype"] = "none"
plt.rcParams["font.family"] = "Arial"

plt.rc("text", usetex=False)
plt.rc("xtick", labelsize="small")
plt.rc("ytick", labelsize="small")
plt.rc("axes", labelsize="medium")
plt.rc("pdf", use14corefonts=True)

all_tasks_res = pd.concat([pd.DataFrame(x) for x in task_name_to_res])

fig, ax = plt.subplots(figsize=(4, 3))
all_tasks_res["task"] = all_tasks_res["task"].apply(
    lambda x: x.replace("gemm_", "").replace("stencil_", "").replace("aes_", "").replace("fft_", "").title()
)
all_tasks_res["task"]

ax = sns.barplot(
    data=all_tasks_res,
    x="task",
    y="bench_stats.edp",
    hue="model",
    hue_order=["Default", "DeepGP", "BoTorch", "Random", "SMAC", "BoGraph"],
    palette=color_palette,
    ax=ax,
)
ax.set(ylabel="Log(EDP(x))", xlabel="Task", ylim=(20, 38))
plt.legend(ncol=len(color_palette) // 2, loc="upper center", fontsize="small")
# fig.savefig(f"{output_location}/all_perfs.{output_format}",
#             bbox_inches='tight',
#             format=f'{output_format}', dpi=600)



# Env time analysis

In [None]:
from autorocks.viz.data.filenames_const import PER_STEP_EXECUTION_TIME_FN
from collections import defaultdict

# Plot showing execution time of each task separately

import glob

import re

# task_name_extractor = re.compile(r"gem5/(.*)/\d+_params")
task_name_extractor = re.compile(r"gem5/(.*)/\d+_params/\d+_iter/([a-zA-Z]*)")
# model_name_extractor = re.compile(r"")
all_tasks = RootDir / f"local_execution/gem5/"
all_execution_files = glob.glob(str(all_tasks / f"**/{PER_STEP_EXECUTION_TIME_FN}"), recursive=True)

In [None]:
execution_times = defaultdict(list)

for exe_file in all_execution_files:
    try:
        with open(exe_file, "r") as f:
            t = float(f.read())
        task_name, model_name = task_name_extractor.findall(exe_file)[0]
        execution_times[(task_name, model_name)].append(t)
    except Exception as e:
        print(e)
        print(f"Error in {exe_file}")

In [None]:
import numpy as np

res = []
for task_model, times in execution_times.items():
    task = task_model[0]
    model = task_model[1]
    res.append({"Task": task, "Model": model, "Time(sec)": np.percentile(times, 0.99)})

In [None]:
import pandas as pd

exe_df = pd.DataFrame(res)
grp_order = exe_df.groupby("Task")["Time(sec)"].agg("mean").sort_values().index
exe_df

In [None]:
import seaborn as sns

sns.barplot(
    data=exe_df,
    x="Task",
    y="Time(sec)",
    hue="Model",
)

In [None]:
import matplotlib.pyplot as plt

plt.rc("xtick", labelsize="small")
plt.rc("ytick", labelsize="medium")
plt.rc("axes", labelsize="medium")
fig, ax = plt.subplots(figsize=(4, 3))
exe_time = exe_df.groupby("Task")["Time(sec)"].agg(["mean", "std"]).sort_values(by="mean", ascending=True)
grp_order = exe_time.index
exe_df["Task"].apply(
    lambda x: x.replace("gemm_", "")
    .replace("stencil_", "")
    .replace("aes_", "")
    .replace("fft_", "")
    .replace("spmv_", "")
    .replace("md_", "")
    .upper()
)

ax = sns.barplot(data=exe_df, y="Task", x="Time(sec)", order=grp_order, palette="colorblind", ax=ax)
# viz._add_num_to_plot(ax, "{}s")

for y, x in enumerate(exe_time["mean"]):
    plt.annotate(f"{x:.2f}s", xy=(10, y), va="center")
# for y, x in enumerate(exe_time['std']):
#     plt.annotate(u" \u00B1 " + f"{x:.2f}", xy=(50, y), va='center')

#
if save_res:
    fig.savefig(
        f"{output_location}/env_exetime.{output_format}", bbox_inches="tight", format=f"{output_format}", dpi=600
    )