In [98]:
import matplotlib.pyplot as plt
%matplotlib inline
%load_ext autoreload
%autoreload 2


import os

os.environ["SYSTEM_MEMORY"] = "16gb"

In [110]:
from autorocks.envs.postgres.schema import PostgresParametersCollection10
from autorocks.envs.postgres.benchmarks.benchbase import BenchmarkClass
from autorocks.data.loader.all_models_result_aggregator import create_all_models_comparison_dataset
from autorocks.dir_struct import ProcessedDataDir

bench_name = str(BenchmarkClass.YCSB)
obj_name = "latency_p99"
param_space = PostgresParametersCollection10()
param_names = set([p.name for p in param_space.parameters()])

exp_dir = ProcessedDataDir / f"postgres/{obj_name}/{bench_name}/{len(param_space)}_params/100_iter"
model_comparison_data = create_all_models_comparison_dataset(exp_dir)

main_target = "bench_res.latency_p99"


In [111]:
import pandas as pd

print(pd.DataFrame(param_space.to_latex()).to_latex())

In [112]:
import autorocks.viz.viz as viz

model_comparison_data_c = viz.unify_model_name(model_comparison_data)
color_palette = viz.create_color_palette(
    model_comparison_data_c, ["#9b59b6", "#3498db", "#95a5a6", "#e74c3c", "#34495e", "#2ecc71"]
)

In [113]:
perf_df = model_comparison_data_c.sys_observations.copy()

# TODO: this exist as bridge since I changed the format of logging
index_of_res_in_new_format = perf_df["latency_p99"].notna()
perf_df.loc[index_of_res_in_new_format, main_target]  = perf_df[index_of_res_in_new_format]["latency_p99"]

In [114]:
# Count errors
perf_df["errors"] = perf_df[main_target] > 1e9

In [115]:
import seaborn as sns
import matplotlib.pyplot as plt

errors_df = perf_df[["errors", "model", "iteration"]].groupby(["model", "iteration"]).sum().reset_index()

f, ax = plt.subplots(figsize=(9, 4))
sns.boxplot(
    data=errors_df,
    y="errors",
    x="model",
    palette=color_palette,
    order=viz.cal_plot_order(errors_df, "errors", ascending=True),
)

In [116]:
# Drop the errors
perf_df.loc[perf_df[main_target] > 1e9] = None

In [117]:
perf_df[["model", main_target]].groupby("model").min()

In [118]:

viz.perf_boxplot(
    perf_df=perf_df,
    optimization_type=viz.OptimizationType.MINIMIZE,
    ylabel="P99 Latency (ms)",
    comparison_col=main_target,
    model_palette_map=color_palette,
    # horizontal_line="Default",
        fig_size = (9, 5)
)

In [119]:
perf_df.groupby(['model', 'iteration']).last().reset_index()[["model", main_target]]

In [121]:
# Drop OtterTune for being bad
# perf_df.loc[perf_df["model"] == "OtterTune"] = None

fig = viz.roi_boxplot(
    df=perf_df,
    optimization_type=viz.OptimizationType.MINIMIZE,
    ylabel="P99 Latency (ms)",

    comparison_col=main_target,
    model_palette_map=color_palette,
    model_baseline="Default",
)
output_location = "/home/salabed/workspace/latex/papers/mlsys23_bograph/figs/svg"
output_format="svg"

fig.savefig(f"{output_location}/postgres_box.{output_format}",
            bbox_inches='tight',
            format=f'{output_format}', dpi=600)
fig

In [34]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

# TODO: Plot the convergence plot


convergence_df = perf_df.copy()
convergence_df.loc[convergence_df[main_target] > 1e9] = None

# doesn't make sense to show convergence for static
convergence_df.loc[convergence_df["model"] == "OtterTune"] = None
convergence_df.loc[convergence_df["model"] == "Default"] = None


In [35]:
# impute the missing results
convergence_df = viz.imput_all_missing_vals(df=convergence_df, max_steps=100, target=main_target)

In [41]:
convergence_df = convergence_df[convergence_df['step'] > 15]

In [42]:
convergence_df["rolling"] = (
    convergence_df[[main_target, "model", "iteration"]]
    .groupby(["model", "iteration"])
    .agg({main_target: f"cum{str(viz.OptimizationType.MINIMIZE)}"})
)

In [44]:
fig = viz.convergence_lineplot(
    df=convergence_df,  # model_comparison_data_c.system_performance,
    optimization_type=viz.OptimizationType.MINIMIZE,
    ylabel="P99 Latency (ms)",
    column_name="rolling",
    model_palette_map=color_palette,
    fig_size=(4, 3),
)
fig

In [285]:
main_target

In [16]:
from autorocks.viz import causal_util
from causalnex.structure import StructureModel
import networkx as nx
from autorocks.viz.causal_util import plot_struct_customized
from IPython.display import Image

sm = StructureModel(nx.read_edgelist("postgres.edgelist.gcastle", create_using=nx.DiGraph))

viz = plot_struct_customized(
    sm,
    "Test",
    param_nodes=param_names,
    sink_nodes=main_target,
)


Image(viz.draw(format="png"))

In [5]:
sm.edges