In [1]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

In [2]:
# Check dependencies exist before doing all the work
import networkx as nx
import causalnex
from causalnex.plots import plot_structure
from causalnex.structure import StructureModel

# Make sure to have graphviz installed and pygraphviz:
# brew install graphviz
# sudo apt-get install graphviz graphviz-dev
# pip install graphviz
# pip install pygraphviz
plot_structure(StructureModel())
print(causalnex.__version__)

In [18]:


import os
os.environ["SYSTEM_MEMORY"] =  "16gb"

from autorocks.envs.postgres.schema import PostgresParametersCollection10
from autorocks.data.loader.all_models_result_aggregator import create_all_models_comparison_dataset
from autorocks.dir_struct import PackageRootDir

task_name = "ycsb"
obj_name = "latency_p99"
exp_dir = PackageRootDir / f"ProcessedData/postgres/{obj_name}/{task_name}/10_params/100_iter"
model_comparison_data = create_all_models_comparison_dataset(exp_dir)

param_space = PostgresParametersCollection10()
param_names = set([p.name for p in param_space.parameters()])
main_targets = ["bench_res.latency_p99"]

from dataclasses import fields
from autorocks.envs.postgres.benchmarks.benchbase import BenchbaseResult
benchmark_results = {f"bench_res.{f.name}" for f in fields(BenchbaseResult)}
# # to latex
# import pandas as pd
#
# pd.DataFrame(param_space.to_latex(),
#              columns=['name', 'lower bound', 'upper bound']).to_csv('input_params.csv')

In [19]:
model_comparison_data.sys_observations

In [23]:
# Combine all models data and flatten the structure
system_pref = model_comparison_data.sys_observations[main_targets].copy()
params_df = model_comparison_data.sys_params
extra_perf = model_comparison_data.sys_observations[set(model_comparison_data.sys_observations.columns) - benchmark_results].copy()

system_pref = system_pref.reset_index(drop=True)
params_df = params_df.drop(columns=["model", "iteration", "step"]).reset_index(drop=True)
extra_perf = extra_perf.drop(columns=["model", "iteration", "step"]).fillna(0).reset_index(drop=True)

In [27]:
extra_perf

# Preprocessing steps

In [28]:
from autorocks.optimizer.bograph.preprocessor.standardizer import MetricsStandardizerProcessor
from autorocks.optimizer.bograph.preprocessor.normalizer import ParamNormalizerProcessor
from autorocks.optimizer.bograph.preprocessor.variance_threshold import VarianceThresholdPreprocessor
from autorocks.optimizer.bograph.dag_preprocessor import PreprocessingPipeline
from autorocks.optimizer.bograph.bograph_dao import BoGraphDataPandas


data = BoGraphDataPandas(params=params_df.copy(), objs=system_pref.copy(), intermediate=extra_perf.copy())

dp = PreprocessingPipeline(
    preprocessors=[
        VarianceThresholdPreprocessor({}),
        ParamNormalizerProcessor(param_space.bounds(True).T),
        MetricsStandardizerProcessor(),
    ]
)
processed_data = dp.fit_transform(data)

processed_data

# Structure between main objectives

In [30]:
processed_data_df

In [29]:
processed_data_df = processed_data.to_combi_pandas()

In [64]:
from causalnex.structure.pytorch import from_pandas
import torch

torch.set_default_tensor_type(torch.cuda.FloatTensor)
torch.set_default_dtype(torch.float32)
sm = from_pandas(
    processed_data_df,
    w_threshold=0.1,
    tabu_parent_nodes=main_targets,
    tabu_child_nodes=param_names,
)
print("Done")




# Using gcastle

In [87]:
from castle.algorithms import  GOLEM

model = GOLEM(device_type="gpu", num_iter=10000)

combi_pandas = processed_data.to_combi_pandas()
model.learn(combi_pandas.values, columns=combi_pandas.columns)
import pandas as pd
G = nx.from_pandas_adjacency(
    pd.DataFrame(
        model.causal_matrix,
        index=model.causal_matrix.columns,
        columns=model.causal_matrix.columns,
    )
)

print("Done")

In [99]:
from autorocks.viz.causal_util import plot_struct_customized
from autorocks.viz import causal_util

G = causal_util.post_process_graph(G, sources=param_names, sinks=main_targets)
viz = plot_struct_customized(G, "Test", param_nodes=param_names, sink_nodes=main_targets)

from IPython.display import Image

Image(viz.draw(format="png"))



# CAusalnex

In [84]:
from autorocks.viz import causal_util

smaller_sm = sm.copy()
smaller_sm.remove_edges_below_threshold(0.1)
smaller_sm = smaller_sm.get_largest_subgraph()
G = causal_util.post_process_graph(smaller_sm, sources=param_names, sinks=main_targets)
viz = plot_struct_customized(
    G, graph_name=f"Structure for Postgres", param_nodes=param_names, sink_nodes=main_targets
)
Image(viz.draw(format="png"))


In [100]:
nx.write_edgelist(G, "postgres.edgelist.gcastle")