In [None]:
import numpy as np
from os import makedirs

# Evolutionary experiment path
evomol_exp_dict = {
    "Evolutionary optimization only" : "output/01_EA_baseline" 
}

# BBO experiments paths
BBO_exp_dict = {
    "BBO(Shingles, ·)": "output/03_BBO_SHINGLES_DP",
    "BBO(MBTR, RBF)": "output/02_BBO_MBTR_RBF"
}

# Names of the different runs (runs that are not defined will be ignored)
sub_experiments_names = [str(i) for i in range(10)]

# Output destination of figures
output_figures_path = "output/04_figures"
makedirs(output_figures_path, exist_ok=True)

# Numerical targets for ECDF representation and ERT measure
ECDF_TARGETS = np.arange(-10, -1, 0.01)
ERT_TARGETS = np.arange(-10, -1, 1)











sub_experiments_names = ["0"]


## Extracting results

In [None]:
from bbomol.postprocessing import extract_multiple_BBO_experiments_data, extract_multiple_evomol_experiments_data

def extract_all_data(BBO_experiments_dict, EvoMol_experiments_dict, sub_experiments_names):
    
    results_dict = {}
    
    for exp_name, path in BBO_experiments_dict.items():
        results_dict[exp_name] = extract_multiple_BBO_experiments_data(path, sub_experiments_names)
        
    for exp_name, path in EvoMol_experiments_dict.items():
        results_dict[exp_name] = extract_multiple_evomol_experiments_data(path, sub_experiments_names)

    return results_dict

In [None]:
# Extracting all results
results_dict = extract_all_data(BBO_exp_dict, evomol_exp_dict, sub_experiments_names)

## Empirical cumulative distribution functions (ECDF)

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(style="ticks", color_codes=True)
from bbomol.evaluation import compute_ecdf, compute_timestamps_ecdf
from os.path import join

def plot_ECDF(results_dict, timestamps=False):
        
    plt.figure(figsize=(7, 5))

    for i, experiment_name in enumerate(list(results_dict.keys())):
        
        if timestamps:
            obj_calls, ecdf_vect = compute_timestamps_ecdf(
                timestamps_list=results_dict[experiment_name]["timestamps"],
                obj_values_list=results_dict[experiment_name]["best_scores_timestamps"],
                targets=ECDF_TARGETS
            )
            
            plt.plot(obj_calls/3600, ecdf_vect, label=experiment_name)
            
        else:
            obj_calls, ecdf_vect = compute_ecdf(
                    obj_calls_list=results_dict[experiment_name]["dataset_success_n_calls"],
                    obj_values_list=results_dict[experiment_name]["dataset_success_obj_value"],
                    targets=ECDF_TARGETS
            )
            
            plt.plot(obj_calls, ecdf_vect, label=experiment_name)

        
    if timestamps:
        plt.xlim(0, 60)
        plt.xlabel("Time (h)")
    else:
        plt.xlim(1, 1000)
        plt.xlabel("# DFT calls")
    
    plt.legend(loc="lower right")
    plt.ylim(0, 1)
    plt.ylabel("Proportion of targets achieved")

    plt.savefig(join(output_figures_path, "ECDF.png"), dpi=600)
    plt.show()
    
def plot_ECDF_timestamps(results_dict):
        
    plt.figure(figsize=(7, 5))

    for i, experiment_name in enumerate(list(results_dict.keys())):
        
        

        plt.plot(obj_calls, ecdf_vect, label=experiment_name)
    
    plt.legend(loc="lower right")
    plt.xlim(1, 1000)
    plt.ylim(0, 1)
    plt.xlabel("# DFT calls")
    plt.ylabel("Proportion of targets achieved")

    plt.savefig(join(output_figures_path, "ECDF.png"), dpi=600)
    plt.show()

In [None]:
plot_ECDF(results_dict)

In [None]:
plot_ECDF(results_dict, timestamps=True)

## Expected running time (ERT)

In [None]:
from bbomol.evaluation import compute_ERT, compute_ERT_timestamps
from IPython.display import display, HTML
import pandas as pd


def display_ERT(results_dict, timestamps=False):
    
    output_keys = ["Experiment"] + [str(value) for value in ERT_TARGETS]
    ERT_dict = {output_key: [] for output_key in output_keys}
    
    if timestamps:
        display(HTML("<h3> Time (h) </h3>"))
    else:
        display(HTML("<h3> Number of DFT calls </h3>"))

    for i, experiment_name in enumerate(list(results_dict.keys())):
        
        if timestamps:
            
            ERT_vect = compute_ERT_timestamps(
                timestamps_list=results_dict[experiment_name]["timestamps"],
                obj_values_list=results_dict[experiment_name]["best_scores_timestamps"],
                targets=ERT_TARGETS,
                effective_last_timestamp_list=results_dict[experiment_name]["effective_last_timestamp"]
            )/3600
            
        else:
        
            ERT_vect = compute_ERT(
                obj_calls_list=results_dict[experiment_name]["dataset_success_n_calls"],
                obj_values_list=results_dict[experiment_name]["dataset_success_obj_value"],
                targets=ERT_TARGETS
            )
        
        ERT_dict["Experiment"].append(experiment_name)
        
        for j in range(len(ERT_vect)):
            ERT_dict[output_keys[j+1]].append(ERT_vect[j])
    
    df = pd.DataFrame.from_dict(ERT_dict)
    display(df)
    

In [None]:
display_ERT(results_dict)

In [None]:
display_ERT(results_dict, timestamps=True)