In [1]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

In [None]:
from autorocks.envs.gem5.schema import Gem5ParametersCollection20
from autorocks.data.loader.all_models_result_aggregator import create_all_models_comparison_dataset
import networkx as nx
from collections import defaultdict
from autorocks.dir_struct import RootDir
from autorocks.envs.gem5.benchmarks.benchmark_tasks import MachSuiteTask

task_name = str(MachSuiteTask.SPMV_ELLPACK)
exp_dir = RootDir.parent / f"local_execution/gem5osdi/{task_name}/20_params/100_iter"
model_comparison_data = create_all_models_comparison_dataset(exp_dir)

param_space = Gem5ParametersCollection20()
param_names = set([p.name for p in param_space.parameters()])
main_targets = ["bench_stats.avg_power", "detailed_stats.system.sim_seconds"]

from notebooks.gem5.statistics_parser import all_models_parser

df = all_models_parser(exp_dir)

# Combine all models data and flatten the structure
system_pref = model_comparison_data.system_performance
params_df = model_comparison_data.parameters
extra_perf = df.performance

model_filter = "BoGraph"
system_pref = system_pref[system_pref.model == model_filter]
params_df = params_df[model_comparison_data.parameters.model == model_filter]
extra_perf = extra_perf[df.performance.model == model_filter]

system_pref = system_pref[["bench_stats.avg_power", "detailed_stats.system.sim_seconds", "step", "iteration"]]
params_df = params_df.drop(columns=["model"])
extra_perf = extra_perf.drop(columns=["model"]).fillna(0)

# TODO: make this a function
# Get only one instance to replicate one real episode
extra_perf = extra_perf[extra_perf["iteration"] == 0].drop(columns=["step", "iteration"]).reset_index(drop=True)
system_pref = system_pref[system_pref["iteration"] == 0].drop(columns=["step", "iteration"]).reset_index(drop=True)
params_df = params_df[params_df["iteration"] == 0].drop(columns=["step", "iteration"]).reset_index(drop=True)

import numpy as np

system_pref["edp"] = np.log(
    system_pref["bench_stats.avg_power"] * (1 / system_pref["detailed_stats.system.sim_seconds"]) ** 2
)

In [None]:
from autorocks.optimizer.bograph.preprocessor.standardizer import MetricsStandardizerProcessor
from autorocks.optimizer.bograph.preprocessor.normalizer import ParamNormalizerProcessor
from autorocks.optimizer.bograph.preprocessor.variance_threshold import VarianceThresholdPreprocessor
from autorocks.optimizer.bograph.dag_preprocessor import PreprocessingPipeline
from autorocks.optimizer.bograph.preprocessor.grouper import GrouperProcessor
from autorocks.optimizer.bograph.bograph_dao import BoGraphDataPandas


data = BoGraphDataPandas(params=params_df, objs=system_pref, intermediate=extra_perf)

dp = PreprocessingPipeline(
    preprocessors=[
        VarianceThresholdPreprocessor(),
        ParamNormalizerProcessor(param_space.bounds(True).T),
        MetricsStandardizerProcessor(),
    ]
)
processed_data = dp.fit_transform(data)

processed_data.params

In [None]:
from autorocks.optimizer.bograph.structure_learn.notears.notears import NoTears

sl = NoTears(
    tabu_parents=data.objs.columns.tolist(),
    tabu_children=data.params.columns.tolist(),
)
struct = sl.learn_structure(data)
struct

# Viz


In [None]:
from autorocks.viz.causal_util import plot_struct_customized
from IPython.display import Image

viz = plot_struct_customized(
    struct, graph_name=f"Structure for {task_name}", param_nodes=param_names, sink_nodes=main_targets
)
Image(viz.draw(format="png"))

In [None]:
G = struct.copy()
G.remove_edges_below_threshold(0.1)
sources = data.params.columns.to_list()
sinks = data.objs.columns.to_list()
new_G = nx.DiGraph()

for source in sources:
    for sink in sinks:
        paths_to_sink = list(nx.all_simple_edge_paths(G, source, sink, cutoff=5))
        # if len(paths_to_sink) < 1:
        # If there is no path found through intermediate results, add it manually
        # paths_to_sink = [[(source, sink)]]
        # all_paths.append(paths_to_sink)
        for path in paths_to_sink:
            new_G.add_edges_from(path)

new_G.add_edges_from(
    [
        ("detailed_stats.system.sim_seconds", "edp", {"weight": 3, "expert": True}),
        ("bench_stats.avg_power", "edp", {"weight": 3, "expert": True}),
    ],
    origin="expert",
)

In [None]:
viz = plot_struct_customized(new_G, graph_name=f"Structure for {task_name}", param_nodes=sources, sink_nodes=sinks)
Image(viz.draw(format="png"))

In [None]:
G = nx.DiGraph()
G.add_edges_from(all_paths)
print("hi")

In [228]:
data.objs

Unnamed: 0,bench_stats.avg_power,detailed_stats.system.sim_seconds,edp
0,1.107005,-0.521065,1.378906
1,0.518094,-0.089536,1.002481
2,0.837647,-1.359627,1.722672
3,0.718654,-1.783779,2.016580
4,-0.150020,-0.926891,0.777367
...,...,...,...
95,-0.383782,-0.162748,-0.664837
96,-0.337542,-0.163259,-0.131071
97,-0.343980,1.509272,-0.622725
98,-0.355262,-0.175322,-0.264749
