In [1]:
%matplotlib inline
%load_ext autoreload
%autoreload 2


In [2]:
# Check dependencies exist before doing all the work
import networkx as nx
import causalnex
from causalnex.plots import plot_structure
from causalnex.structure import StructureModel

# Make sure to have graphviz installed and pygraphviz:
# brew install graphviz
# sudo apt-get install graphviz graphviz-dev
# pip install graphviz
# pip install pygraphviz
plot_structure(StructureModel())
print(causalnex.__version__)

In [3]:
from sysgym.envs.gem5.schema import AladdinSweeper20Params
from sysgym.envs.gem5.benchmarks.benchmark_tasks import MachSuiteTask
from autorocks.data.loader.all_models_result_aggregator import create_all_models_comparison_dataset
from autorocks.dir_struct import LocalResultDir

task_name = str(MachSuiteTask.AES)
obj_name = "latency_power"
exp_dir = LocalResultDir / f"gem5/{obj_name}/{task_name}/20_params/100_iter"
model_comparison_data = create_all_models_comparison_dataset(exp_dir,force_recompute = True)

param_space = AladdinSweeper20Params()
param_names = set([p.name for p in param_space.parameters()])
main_targets = ["bench_stats.avg_power", "detailed_stats.system.sim_seconds"]

# # to latex
# import pandas as pd
#
# pd.DataFrame(param_space.to_latex(),
#              columns=['name', 'lower bound', 'upper bound']).to_csv('input_params.csv')

In [4]:
from notebooks.gem5.statistics_parser import all_models_parser

df = all_models_parser(exp_dir)

In [6]:
for col in df.performance.columns:
    print(col)

In [18]:
df.performance

In [5]:
# Combine all models data and flatten the structure
system_pref = model_comparison_data.sys_observations
params_df = model_comparison_data.sys_params
extra_perf = df.performance
#
# model_filter = "BoGraph"
# system_pref = system_pref[system_pref.model == model_filter]
# params_df = params_df[model_comparison_data.sys_params.model == model_filter]
# extra_perf = extra_perf[df.performance.model == model_filter]

system_pref = system_pref[["bench_stats.avg_power", "detailed_stats.system.sim_seconds"]].reset_index(drop=True)
params_df = params_df.drop(columns=["model", "iteration", "step"]).reset_index(drop=True)
extra_perf = extra_perf.drop(columns=["model", "iteration", "step"]).fillna(0).reset_index(drop=True)

# Preprocessing steps

In [6]:
from autorocks.optimizer.bograph.preprocessor.standardizer import MetricsStandardizerProcessor
from autorocks.optimizer.bograph.preprocessor.normalizer import ParamNormalizerProcessor
from autorocks.optimizer.bograph.preprocessor.variance_threshold import VarianceThresholdPreprocessor
from autorocks.optimizer.bograph.dag_preprocessor import PreprocessingPipeline
from autorocks.optimizer.bograph.preprocessor.grouper import GrouperProcessor
from autorocks.optimizer.bograph.bograph_dao import BoGraphDataPandas


data = BoGraphDataPandas(params=params_df.copy(), objs=system_pref.copy(), intermediate=extra_perf.copy())

dp = PreprocessingPipeline(
    preprocessors=[
        VarianceThresholdPreprocessor({}),
        ParamNormalizerProcessor(param_space.bounds(True).T),
        MetricsStandardizerProcessor(),
    ]
)
processed_data = dp.fit_transform(data)

processed_data

# Structure between main objectives

In [7]:
processed_data_df = processed_data.to_combi_pandas()

In [83]:
main_targets = [
    "bench_stats.avg_power",
    "detailed_stats.system.sim_seconds",
]
from causalnex.structure.pytorch import from_pandas
import torch

torch.set_default_tensor_type(torch.cuda.FloatTensor)
torch.set_default_dtype(torch.float32)
sm = from_pandas(
    processed_data_df,
    w_threshold=0.3,
    tabu_parent_nodes=main_targets,
    tabu_child_nodes=param_names,
    hidden_layer_units=[32, 32],
    ridge_beta=0.1
)
print("Done")

In [8]:
from castle.algorithms import  GOLEM

model = GOLEM(device_type="gpu")#, num_iter=10000)

combi_pandas = processed_data.to_combi_pandas()
model.learn(combi_pandas.values, columns=combi_pandas.columns)

In [12]:
import pandas as pd
G = nx.from_pandas_adjacency(
    pd.DataFrame(
        model.causal_matrix,
        index=model.causal_matrix.columns,
        columns=model.causal_matrix.columns,
    )
)


In [13]:
G.edges

In [14]:
from autorocks.viz import causal_util
from autorocks.viz.causal_util import plot_struct_customized

G = causal_util.post_process_graph(G, sources=param_names, sinks=main_targets)
viz = plot_struct_customized(G, "Test", param_nodes=param_names, sink_nodes=main_targets)

from IPython.display import Image

Image(viz.draw(format="png"))



In [17]:
G.nodes

In [16]:
nx.write_edgelist(G, "gem5_aes_mobo.edgelist")

In [81]:
sources = param_names
sinks = main_targets

params_to_obj_graph = nx.DiGraph()
# 1. Keep only nodes that are connected to objectives from parameters.
for source in sources:
    for sink in sinks:
        paths_to_sink = list(
            nx.all_simple_edge_paths(G, source, sink, cutoff=5)
        )
        for path in paths_to_sink:
            print(path)
            params_to_obj_graph.add_edges_from(path)


In [76]:
from autorocks.viz.causal_util import plot_struct_customized
sm = StructureModel(params_to_obj_graph)
viz = plot_struct_customized(sm, "Test", param_nodes=param_names, sink_nodes=main_targets)

In [78]:
from IPython.display import Image

Image(viz.draw(format="png"))