In [None]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

In [None]:
# Check dependencies exist before doing all the work
import networkx as nx
import causalnex
from causalnex.plots import plot_structure
from causalnex.structure import StructureModel

# Make sure to have graphviz installed and pygraphviz:
# brew install graphviz
# sudo apt-get install graphviz graphviz-dev
# pip install graphviz
# pip install pygraphviz
plot_structure(StructureModel())
print(causalnex.__version__)

In [None]:
from autorocks.data.loader.all_models_result_aggregator import create_all_models_comparison_dataset
from autorocks.envs.gem5.schema import Gem5ParametersCollection20

# Capture groups OLD
from collections import defaultdict


from autorocks.dir_struct import LocalResultDir
from autorocks.envs.gem5.benchmarks.benchmark_tasks import MachSuiteTask

task_name = str(MachSuiteTask.AES)
exp_dir = LocalResultDir / f"gem5/{task_name}/20_params/100_iter"
model_comparison_data = create_all_models_comparison_dataset(exp_dir)

param_space = Gem5ParametersCollection20()
param_names = set([p.name for p in param_space.parameters()])
main_targets = ["bench_stats.avg_power", "detailed_stats.system.sim_seconds"]

# # to latex
# import pandas as pd
#
# pd.DataFrame(param_space.to_latex(),
#              columns=['name', 'lower bound', 'upper bound']).to_csv('input_params.csv')

In [None]:
from notebooks.gem5.statistics_parser import all_models_parser

df = all_models_parser(exp_dir)

In [None]:
for col in df.performance.columns:
    print(col)

In [None]:
# Combine all models data and flatten the structure
system_pref = model_comparison_data.system_performance
params_df = model_comparison_data.parameters
extra_perf = df.performance

model_filter = "BoGraph"
system_pref = system_pref[system_pref.model == model_filter]
params_df = params_df[model_comparison_data.parameters.model == model_filter]
extra_perf = extra_perf[df.performance.model == model_filter]

system_pref = system_pref[["bench_stats.avg_power", "detailed_stats.system.sim_seconds", "step", "iteration"]]
params_df = params_df.drop(columns=["model"])
extra_perf = extra_perf.drop(columns=["model"]).fillna(0)

# Preprocessing steps

In [None]:
from sklearn.feature_selection import VarianceThreshold

# Remove low variance features

sel = VarianceThreshold(threshold=(0.8 * (1 - 0.8)))
sel.fit_transform(extra_perf)
extra_perf_no_low = extra_perf.loc[:, sel.get_support()]
# extra_perf_no_low['step'] = extra_perf['step'].copy()
# extra_perf_no_low['iteration'] = extra_perf['iteration'].copy()

In [None]:
## new grouping
main_groups = defaultdict(list)
for metric in extra_perf_no_low.columns:
    if "::" in metric:
        continue  # histogram, ignore
    groups = metric.split(".")
    if not groups or len(groups) < 2:
        continue
    main_groups[f"{groups[1]}_{groups[-2]}"].append(metric)

for group, val in main_groups.items():
    print(f"{group} has: {len(val)} items")

In [None]:
# Option B
import re
from sklearn.decomposition import FactorAnalysis
from sklearn.preprocessing import StandardScaler

# Ensure all within same ranges
scaler = StandardScaler()
sub_group_extractor = re.compile(".*\.([^.]+)", re.RegexFlag.IGNORECASE)
group_extractor = re.compile("([^.]+)", re.RegexFlag.IGNORECASE)
main_groups = defaultdict(list)
sub_groups = defaultdict(list)

# All columns minus the none static one
idx_cols = {"iteration", "step"}
values_cols = set(extra_perf_no_low.columns) - idx_cols

for metric in values_cols:
    groups = group_extractor.findall(metric)
    if not groups or len(groups) < 2:
        continue
    sub_name = sub_group_extractor.findall(metric)

    main_groups[groups[1]].append(sub_name[0])
    sub_groups[sub_name[0]].append(metric)

# import pandas as pd
from sklearn.decomposition import FactorAnalysis, PCA
from sklearn.preprocessing import StandardScaler
import numpy as np

metric_pruned = extra_perf_no_low.copy()

for group, sub_group_names in main_groups.items():
    # print(f"{group} has: {len(sub_metrics)} items")
    group_vals = []

    for sub_group_name in sub_group_names:
        sub_metrics = sub_groups[sub_group_name]
        scaler = StandardScaler()
        scaled_vals = scaler.fit_transform(metric_pruned[sub_metrics].values)
        if len(sub_metrics) > 1:
            # reduce it to 1.
            transformer = PCA(n_components=1)
            decomposed_vals = transformer.fit_transform(scaled_vals)
            group_vals.append(decomposed_vals.squeeze())
        else:
            group_vals.append(scaled_vals.squeeze())
    group_vals_ = np.vstack(group_vals).T
    group_transformer = PCA(n_components=1)
    group_pruned_val = group_transformer.fit_transform(group_vals_)
    metric_pruned[group] = group_pruned_val
new_cols = set(list(main_groups.keys()) + ["iteration", "step"])
metric_pruned = metric_pruned[new_cols]

In [None]:
import sklearn.preprocessing as pre

# normalize parameters
params_df_scaled = params_df.copy()
scaler = StandardScaler()  # pre.MinMaxScaler()#StandardScaler()
all_cols_no_idx = set(params_df_scaled.columns) - {"step", "iteration"}
params_df_scaled_val = scaler.fit_transform(params_df_scaled[all_cols_no_idx].values)
params_df_scaled.loc[:, all_cols_no_idx] = params_df_scaled_val

# standardize res

system_pref_scaled = system_pref.copy()
scaler = pre.StandardScaler()  # StandardScaler()
all_cols_no_idx = set(system_pref_scaled.columns) - {"step", "iteration"}
system_pref_scaled_val = scaler.fit_transform(system_pref_scaled[all_cols_no_idx].values)
system_pref_scaled.loc[:, all_cols_no_idx] = system_pref_scaled_val

# Merge data
param_targets = params_df_scaled.merge(system_pref_scaled, on=["step", "iteration"])
param_targets = param_targets.merge(metric_pruned, on=["step", "iteration"])
param_targets = param_targets.drop(
    columns=[
        "iteration",
        "step",
    ]
)

# Structure between main objectives

In [None]:
main_targets = [
    "bench_stats.avg_power",
    "detailed_stats.system.sim_seconds",
]
from causalnex.structure.pytorch import from_pandas
import torch

torch.set_default_tensor_type(torch.cuda.FloatTensor)
torch.set_default_dtype(torch.float32)
sm = from_pandas(
    param_targets,
    # w_threshold=0.8,
    tabu_parent_nodes=main_targets,
    tabu_child_nodes=param_names,
    # hidden_layer_units=[1],
    # ridge_beta=0.1
)
print("Done")

In [None]:
main_targets = ["bench_stats.avg_power", "detailed_stats.system.sim_seconds", "EDP"]

# Manual search

In [None]:
nodes_to_keep = set()

for node in nx.shortest_path(smaller_sm, source="cycle_time", target="EDP"):
    nodes_to_keep.add(node)
for node in nx.shortest_path(smaller_sm, source="enable_l2", target="EDP"):
    nodes_to_keep.add(node)
for node in nx.shortest_path(smaller_sm, source="cache_line_sz", target="EDP"):
    nodes_to_keep.add(node)

In [None]:
smaller_sm

In [None]:
from autorocks.viz.causal_util import clean_node_name


node_names = []
for node in smaller_sm.nodes:
    node_names.append(clean_node_name(node))

In [None]:
new_graph = nx.DiGraph()
for u, v in smaller_sm.edges():
    new_graph.add_edge(clean_node_name(u), clean_node_name(v))

In [None]:
nx.nx_agraph.to_agraph(new_graph).layout()

In [None]:
for node in new_graph.nodes:
    print(f'"{node}";')

In [None]:
import pygraphviz as pgv

nx.nx_agraph.to_agraph(new_graph).draw(f, format="dot", prog="nop")

In [None]:
from autorocks.viz.causal_util import plot_struct_customized
from IPython.display import Image

smaller_sm = sm.copy()
smaller_sm.remove_edges_below_threshold(0.35)
# smaller_sm.remove_edge("detailed_stats.system.sim_seconds", "cpu")
smaller_sm.add_edges_from(
    [
        ("detailed_stats.system.sim_seconds", "EDP", {"weight": 3, "expert": True}),
        ("bench_stats.avg_power", "EDP", {"weight": 3, "expert": True}),
        ("cpu", "bench_stats.avg_power", {"weight": 2}),
    ],
    origin="expert",
)

# smaller_sm.add_edges_from(smaller_in,
#                           origin="learned")
smaller_sm = smaller_sm.get_largest_subgraph()

smaller_sm.remove_nodes_from(set(smaller_sm.nodes) - nodes_to_keep)
f = f"{task_name}_structure.dot"
viz = plot_struct_customized(
    smaller_sm, graph_name=f"Structure for {task_name}", param_nodes=param_names, sink_nodes=main_targets
)
# obj_subgraph = viz.subgraph(['bench_stats.avg_power', 'detailed_stats.system.sim_seconds'], name="objectives")
# obj_subgraph.graph_attr.update({"rank": "same"})
viz.draw(f, format="dot")
# Image(f)
Image(viz.draw(format="png"))

In [None]:
nx.predecessor(smaller_sm, source="param_space", target="EDP")

In [None]:
from autorocks.viz.causal_util import plot_struct_customized
import matplotlib.pyplot as plt

plt.rcParams["svg.fonttype"] = "none"
plt.rcParams["font.family"] = "Arial"
out_dir = "/home/salabed/workspace/latex/papers/osdi21_bograph/figs"

viz = plot_struct_customized(
    smaller_sm, graph_name=f"Structure for {task_name}", param_nodes=param_names, sink_nodes=main_targets
)

name = f"{task_name}_structure"
f = f"{out_dir}/svg/{name}.svg"
viz.draw(f, format="svg")
f = f"{out_dir}/{name}.pdf"
# viz.draw(f, format='pdf')

Image(viz.draw(format="png"))

In [None]:
max(smaller_sm.degree, key=lambda x: x[1])[1]

In [None]:
found_nodes = []

for node in sm.get_target_subgraph(main_targets[0]).nodes():
    if node in param_names:
        found_nodes.append(node)

print(found_nodes)

In [None]:
# OLD ST UFF

import re

group_extractor = re.compile("([^.]+)", re.RegexFlag.IGNORECASE)
main_groups = defaultdict(list)
for metric in extra_perf_no_low.columns:
    groups = group_extractor.findall(metric)
    if not groups or len(groups) < 2:
        continue
    main_groups[groups[1]].append(metric)

for group, val in main_groups.items():
    print(f"{group} has: {len(val)} items")
### Option A
# import pandas as pd
from sklearn.decomposition import FactorAnalysis, PCA
from sklearn.preprocessing import StandardScaler
import numpy as np

metric_pruned = extra_perf_no_low.copy()
scaler = StandardScaler()
all_cols_no_idx = set(metric_pruned.columns) - {"step", "iteration"}
metric_pruned.loc[:, all_cols_no_idx] = scaler.fit_transform(metric_pruned[all_cols_no_idx].values)


for group, sub_metrics in main_groups.items():
    # print(f"{group} has: {len(sub_metrics)} items")
    group_vals = []
    scaled_vals = metric_pruned[sub_metrics]
    if len(sub_metrics) > 1:
        # reduce it to 1.
        transformer = PCA(n_components=1)
        decomposed_vals = transformer.fit_transform(scaled_vals)
        group_vals.append(decomposed_vals.squeeze())
    else:
        group_vals.append(scaled_vals.squeeze())
    group_vals_ = np.vstack(group_vals).T
    # group_transformer = FactorAnalysis(n_components=1)
    # group_pruned_val = group_transformer.fit_transform(group_vals_)
    metric_pruned[group] = group_vals_
#
new_cols = set(list(main_groups.keys()) + ["iteration", "step"])
#
metric_pruned = metric_pruned[new_cols]
metric_pruned

In [None]:
metric_pruned

In [None]:
main_groups.keys()

In [None]:
##### TOOO OLD
# BACKUP Option b
from sklearn.decomposition import FactorAnalysis, PCA
from sklearn.preprocessing import StandardScaler

import re

# Ensure all within same ranges
scaler = StandardScaler()
sub_group_extractor = re.compile(".*\.([^.]+)", re.RegexFlag.IGNORECASE)
sub_groups = defaultdict(list)

# All columns minus the none static one
idx_cols = {"iteration", "step"}
values_cols = set(extra_perf_no_low.columns) - idx_cols

for metric in values_cols:
    sub_name = sub_group_extractor.findall(metric)
    if not sub_name:
        continue
    sub_groups[sub_name[0]].append(metric)

# import pandas as pd
metric_pruned = extra_perf_no_low.copy()

for group, sub_metrics in sub_groups.items():
    # print(f"{group} has: {len(sub_metrics)} items")
    scaler = StandardScaler()
    scaled_vals = scaler.fit_transform(metric_pruned[sub_metrics].values)
    if len(sub_metrics) > 1:
        # reduce it to 1.
        transformer = PCA(n_components=1)
        decomposed_vals = transformer.fit_transform(scaled_vals)
        metric_pruned[group] = decomposed_vals.squeeze()
    else:
        metric_pruned[group] = scaled_vals.squeeze()
new_cols = set(list(sub_groups.keys()) + ["iteration", "step"])

metric_pruned = metric_pruned[new_cols]

In [None]:
metric_pruned

In [None]:
param_targets = params_df.merge(system_pref, on=["step", "iteration"])

In [None]:
y = system_pref[["bench_stats.avg_power", "detailed_stats.system.sim_seconds"]].values

import numpy as np

y = y[:, 0] * np.log((1 / y[:, 1]) * (1 / y[:, 1]))
y

In [None]:
y

In [None]:
from sklearn.feature_selection import SelectKBest, chi2

X = metric_pruned.drop(columns=["step", "iteration"])

X

In [None]:
from sklearn.feature_selection import SelectKBest, f_regression

selector = SelectKBest(f_regression, k=50)
X_new = selector.fit_transform(X, y)

X_support = selector.get_support()

In [None]:
small_metric = metric_pruned.drop(columns=["step", "iteration"])
metric_pruned = small_metric.loc[:, selector.get_support()]
metric_pruned["step"] = extra_perf_no_low["step"].copy()
metric_pruned["iteration"] = extra_perf_no_low["iteration"].copy()

In [None]:
metric_pruned

In [None]:
import numpy as np

param_targets["edp"] = np.log(
    param_targets["bench_stats.avg_power"].values * (param_targets["bench_stats.cycle"].values ** 2)
)

param_targets[["edp", "cycle_time"]]

In [None]:
import seaborn as sns


sns.lineplot(data=param_targets[["edp", "cycle_time"]], x="cycle_time", y="edp")

In [None]:
param_targets[["edp", "cycle_time"]]

In [None]:
param_targets[["edp", "cycle_time"]]

In [None]:
param_targets[["edp", "cycle_time"]]

In [None]:
param_targets[["edp", "cycle_time"]]

In [None]:
param_targets[["edp", "cycle_time"]]