In [1]:
import os
import pandas as pd
import numpy as np
from ax_experiments_functions import rc_setup, rc_read_glch_data, rc_label_to_params, rc_params_to_label
from ax_utils import build_optimization_config_mohpo, get_hv_from_df
from ax.core.search_space import SearchSpace

In [2]:
def get_glch_max_complexity_history(glch_data,complexity_axis):

    max_complexity_history = []

    for i in range(glch_data.shape[0]):

        curr_data = glch_data.iloc[:i+1,:]

        curr_max = curr_data[complexity_axis].max()
        
        max_complexity_history.append(curr_max)
    
    return max_complexity_history

In [3]:
ax_results_folder = "/home/lucas/Documents/perceptronac/scripts/tradeoffs/ax_results_energy_params_bits/"
prefix = "micro_joules_per_pixel_data_bits_over_data_samples_ax_methods_seed"

data_csv_path = "/home/lucas/Documents/perceptronac/scripts/tradeoffs/rate-noisy-joules-time-params_hx-10-20-40-80-160-320-640.csv"

glch_csv_paths = {
    "c_angle_rule": "/home/lucas/Documents/perceptronac/scripts/tradeoffs/glch_results/glch2D_angle_rule_constrained_rate_vs_energy_noisy_history.csv",
    "u_angle_rule": "/home/lucas/Documents/perceptronac/scripts/tradeoffs/glch_results/glch2D_angle_rule_unconstrained_rate_vs_energy_noisy_history.csv",
    "c_gift_wrapping": "/home/lucas/Documents/perceptronac/scripts/tradeoffs/glch_results/glch2D_gift_wrapping_constrained_rate_vs_energy_noisy_history.csv",
    "u_gift_wrapping": "/home/lucas/Documents/perceptronac/scripts/tradeoffs/glch_results/glch2D_gift_wrapping_unconstrained_rate_vs_energy_noisy_history.csv",
    "c_tie_break": "/home/lucas/Documents/perceptronac/scripts/tradeoffs/glch_results/glch2D_tie_break_constrained_rate_vs_energy_noisy_history.csv"
}

In [4]:
setup_func = rc_setup
read_glch_data_func = rc_read_glch_data
label_to_params_func = rc_label_to_params
params_to_label_func = rc_params_to_label

parameters,metrics,data = setup_func(data_csv_path)

search_space = SearchSpace(parameters=parameters)



In [5]:
# getting the thresholds

glch_max_c_lists = dict()
all_glch_data = dict()
for lbl,glch_csv_path in glch_csv_paths.items():
    glch_data = read_glch_data_func(glch_csv_path)
    all_glch_data[lbl] = glch_data
    glch_max_c_lists[lbl] = get_glch_max_complexity_history(glch_data,metrics[0].name)

n_iters = max([len(glch_max_c_lists) for glch_max_c_lists in glch_max_c_lists.values()])

glch_max_c_lists = {k:v+((n_iters-len(v))*[None]) for k,v in glch_max_c_lists.items()}

thresholds = pd.DataFrame(glch_max_c_lists).min(axis=1).values.tolist()

In [11]:



all_hvs = {k:[] for k in all_glch_data.keys()}
all_hvs["max_hv"] = []
all_hvs = {**all_hvs, "sobol":[],"ehvi":[],"parego":[]}

for i,th in enumerate(thresholds,1):

    filt_data = data[data[metrics[0].name] <= th]

    ref_point = filt_data[[metric.name for metric in metrics]].max().values * 1.1
    optimization_config = build_optimization_config_mohpo(metrics,ref_point)

    all_hvs["max_hv"].append( get_hv_from_df(search_space,optimization_config,filt_data,label_to_params_func) )
    
    for lbl,glch_data in all_glch_data.items():

        filt_glch_data = glch_data.iloc[:i,:]
        
        filt_glch_data = filt_glch_data[filt_glch_data[metrics[0].name] <= th]
        all_hvs[lbl].append( get_hv_from_df(search_space,optimization_config,filt_glch_data,label_to_params_func) )



    for method in ["sobol","ehvi","parego"]:
        method_hvs = []
        for f in os.listdir(ax_results_folder):
            if f.endswith(".csv") and (prefix in f):
                ax_df = pd.read_csv(os.path.join(ax_results_folder,f))
            
                tmp_df = ax_df[[c for c in ax_df.columns if f"{method}_param_" in c]].copy()
                tmp_df.columns = [c.replace(f"{method}_param_","") for c in tmp_df.columns]
                tmp_df.loc[:,data.index.name] = tmp_df.apply(lambda x: params_to_label_func(*x.values),axis=1)
                tmp_df = tmp_df.reset_index(names="iteration").merge(data,left_on=data.index.name,right_on=data.index.name)
                    # .set_index(data.index.name).sort_values(by="iteration")

                filt_tmp_df = tmp_df.iloc[:i,:]
                
                filt_tmp_df = filt_tmp_df[filt_tmp_df[metrics[0].name] <= th]

                filt_tmp_df = filt_tmp_df.drop_duplicates(subset=[data.index.name])

                if len(set(filt_tmp_df.index)) != len(filt_tmp_df.index):
                    raise ValueError(f"{f} {len(set(filt_tmp_df.index))} {len(filt_tmp_df.index)}")

                if filt_tmp_df.shape[0] == 0:
                    method_hvs.append(0)
                else:
                    method_hvs.append( get_hv_from_df(
                        search_space,optimization_config,filt_tmp_df.set_index(data.index.name),label_to_params_func) )
        
        all_hvs[method].append( sum(method_hvs)/len(method_hvs) )




In [7]:
all_hvs_df = pd.DataFrame(all_hvs)

In [8]:
# ( - all_hvs_df.drop("max_hv",axis=1).sub(all_hvs_df["max_hv"],axis=0)).map(lambda x : np.log10(x) if x!=0 else None ).plot(
#     xlabel="number of observations", ylabel="Log Hypervolume Difference")

In [10]:
all_hvs_df.plot(
    xlabel="number of observations", ylabel="Hypervolume")