# Goal
The goal of this notebook is to compare the performance of multi-objective probability of improvement backed by a multi-
objective homogeneous random forest to random search.

# Methodology
We only need to allow each strategy to execute for some fixed number of iterations. We can subsequently process the resulting observations to estimate the size of the pareto frontier over time. 

Specifically, let's first run both strategies on a 2D hypersphere, 10 times each and compare their average performance.

In [1]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [2]:
from mlos.OptimizerEvaluationTools.ObjectiveFunctionConfigStore import objective_function_config_store
from mlos.OptimizerEvaluationTools.ObjectiveFunctionFactory import ObjectiveFunctionFactory
from mlos.OptimizerEvaluationTools.SyntheticFunctions.Hypersphere import Hypersphere

from mlos.Optimizers.BayesianOptimizerFactory import BayesianOptimizerFactory, bayesian_optimizer_config_store
from mlos.Optimizers.OptimizationProblem import OptimizationProblem, Objective
from mlos.Optimizers.ParetoFrontier import ParetoFrontier

from mlos.Spaces import Point

objective_function_config = objective_function_config_store.get_config_by_name("multi_objective_2_mutually_exclusive_polynomials")
objective_function = ObjectiveFunctionFactory.create_objective_function(objective_function_config)

In [3]:
random_params_df = objective_function.parameter_space.random_dataframe(10)
random_params_df

Unnamed: 0,polynomial_id,domain_1.x_0,domain_1.x_1,domain_0.x_0,domain_0.x_1
0,1,-997.417803,490.11385,,
1,1,668.515787,-79.931971,,
2,1,881.202802,-966.482091,,
3,0,,,394.492201,-119.61495
4,0,,,270.519358,-497.251643
5,1,-818.063896,-629.021691,,
6,1,-433.853602,-895.929924,,
7,1,-185.702147,381.386867,,
8,1,329.636606,-871.975085,,
9,0,,,-476.934168,-929.759039


In [4]:
objective_function.evaluate_dataframe(random_params_df)

Unnamed: 0,y0,y1
0,1181166.0,-5477513.0
1,-695802.2,-3247003.0
2,2652801.0,-3601524.0
3,-1239998.0,-54615.59
4,-1102360.0,-744938.2
5,-8248551.0,-10158920.0
6,-7288583.0,-6813047.0
7,44344.6,-284178.1
8,-375832.9,-1472037.0
9,-5739424.0,-10266660.0


In [5]:
optimization_problem = objective_function.default_optimization_problem

optimizer_config = bayesian_optimizer_config_store.get_config_by_name("default_multi_objective_optimizer_config")

In [None]:
objectives_dfs_by_run = []
optimizers_by_run = []
num_iterations_per_run = 1000
num_runs = 10

for run_id in range(num_runs):

    optimizer = BayesianOptimizerFactory().create_local_optimizer(
        optimization_problem=optimization_problem,
        optimizer_config=optimizer_config
    )
    
    optimizers_by_run.append(optimizer)

    for i in range(num_iterations_per_run):
        print(f"[{run_id}/{num_runs}][{i+1}/{num_iterations_per_run}]")
        suggestion = optimizer.suggest()
        values = objective_function.evaluate_point(suggestion)
        optimizer.register(suggestion.to_dataframe(), values.to_dataframe())
    
    _, objectives_df, _ = optimizer.get_all_observations()
    objectives_dfs_by_run.append(objectives_df)
              

01/23/2021 07:19:03 -   BayesianOptimizerFactory -    INFO - [HomogeneousRandomForestRegressionModel.py: 106 -        _create_estimators() ] Creating 10 estimators. Tree config: {
  "criterion": "mse",
  "splitter": "best",
  "max_depth": 0,
  "min_samples_split": 2,
  "min_samples_leaf": 3,
  "min_weight_fraction_leaf": 0,
  "max_features": "auto",
  "max_leaf_nodes": 0,
  "min_impurity_decrease": 0,
  "ccp_alpha": 0,
  "min_samples_to_fit": 10,
  "n_new_samples_before_refit": 10
}. Request id: 0.30259741115742655
01/23/2021 07:19:03 -   BayesianOptimizerFactory -    INFO - [HomogeneousRandomForestRegressionModel.py: 120 -        _create_estimators() ] Creating DecissionTreeRegressionModel with the input_space:   Name: estimator_0_input_space
  Dimensions:
    domain___polynomial_id: {0, 1}
    contains_context: {False}
    domain___domain_1___x_1: [-1024.00, 1024.00]
    domain___domain_1___x_0: [-1024.00, 1024.00]
01/23/2021 07:19:03 -   BayesianOptimizerFactory -    INFO - [Homogen

01/23/2021 07:19:03 -   BayesianOptimizerFactory -    INFO - [HomogeneousRandomForestRegressionModel.py: 120 -        _create_estimators() ] Creating DecissionTreeRegressionModel with the input_space:   Name: estimator_8_input_space
  Dimensions:
    domain___polynomial_id: {0, 1}
    domain___domain_0___x_1: [-1024.00, 1024.00]
    domain___domain_0___x_0: [-1024.00, 1024.00]
    contains_context: {False}
01/23/2021 07:19:03 -   BayesianOptimizerFactory -    INFO - [HomogeneousRandomForestRegressionModel.py: 120 -        _create_estimators() ] Creating DecissionTreeRegressionModel with the input_space:   Name: estimator_9_input_space
  Dimensions:
    domain___domain_0___x_1: [-1024.00, 1024.00]
    contains_context: {False}
    domain___polynomial_id: {0, 1}
    domain___domain_0___x_0: [-1024.00, 1024.00]


[0/10][1/1000]
[0/10][2/1000]
[0/10][3/1000]
[0/10][4/1000]
[0/10][5/1000]
[0/10][6/1000]
[0/10][7/1000]
[0/10][8/1000]
[0/10][9/1000]
[0/10][10/1000]
[0/10][11/1000]
[0/10][12/1000]
[0/10][13/1000]
[0/10][14/1000]
[0/10][15/1000]
[0/10][16/1000]
[0/10][17/1000]
[0/10][18/1000]
[0/10][19/1000]
[0/10][20/1000]
[0/10][21/1000]
[0/10][22/1000]
[0/10][23/1000]
[0/10][24/1000]
[0/10][25/1000]
[0/10][26/1000]
[0/10][27/1000]
[0/10][28/1000]
[0/10][29/1000]
[0/10][30/1000]
[0/10][31/1000]
[0/10][32/1000]
[0/10][33/1000]
[0/10][34/1000]
[0/10][35/1000]
[0/10][36/1000]
[0/10][37/1000]
[0/10][38/1000]
[0/10][39/1000]
[0/10][40/1000]
[0/10][41/1000]
[0/10][42/1000]
[0/10][43/1000]
[0/10][44/1000]
[0/10][45/1000]
[0/10][46/1000]
[0/10][47/1000]
[0/10][48/1000]
[0/10][49/1000]


In [None]:
pareto_volumes_over_time_per_run = [[] for i in range(10, 1001, 10)]
for run_id, objectives_df in enumerate(objectives_dfs_by_run):
    for i, j in enumerate(range(10, 1001, 10)):
        pareto_frontier = ParetoFrontier(optimization_problem=optimization_problem, objectives_df=objectives_df[:j])
        pareto_volume_estimator = pareto_frontier.approximate_pareto_volume(num_samples=1000000)
        lower_bound, upper_bound = pareto_volume_estimator.get_two_sided_confidence_interval_on_pareto_volume(alpha=0.05)
        pareto_volume = (lower_bound + upper_bound) / 2
        pareto_volumes_over_time_per_run[i].append(pareto_volume)


In [None]:
average_pareto_volume_by_iteration = [sum(pareto_volumes) / len(pareto_volumes) for pareto_volumes in pareto_volumes_over_time_per_run]

In [None]:
average_pareto_volume_by_iteration

In [None]:
# Now let's do the random ones :)
#
num_runs = 10
objectives_dfs_by_random_run = []

for run_id in range(num_runs):
    suggestions_df = objective_function.parameter_space.random_dataframe(num_iterations_per_run)
    objectives_df = objective_function.evaluate_dataframe(suggestions_df)
    objectives_dfs_by_random_run.append(objectives_df)
    
pareto_volumes_over_time_per_random_run = [[] for i in range(10, 1001, 10)]
for run_id, objectives_df in enumerate(objectives_dfs_by_random_run):
    for i, j in enumerate(range(10, 1001, 10)):
        pareto_frontier = ParetoFrontier(optimization_problem=optimization_problem, objectives_df=objectives_df[:j])
        pareto_volume_estimator = pareto_frontier.approximate_pareto_volume(num_samples=1000000)
        lower_bound, upper_bound = pareto_volume_estimator.get_two_sided_confidence_interval_on_pareto_volume(alpha=0.05)
        pareto_volume = (lower_bound + upper_bound) / 2
        pareto_volumes_over_time_per_random_run[i].append(pareto_volume)

average_random_pareto_volume_by_iteration = [sum(pareto_volumes) / len(pareto_volumes) for pareto_volumes in pareto_volumes_over_time_per_random_run]

In [None]:
average_random_pareto_volume_by_iteration

In [None]:
import plotly.graph_objs as go
fig = go.Figure()
fig.add_trace(go.Scatter(
    x=[i for i in range(10, 1001, 10)],
    y=average_pareto_volume_by_iteration,
    name='Guided Optimization: Average Pareto Volume vs. Iteration'
))

fig.add_trace(go.Scatter(
    x=[i for i in range(10, 1001, 10)],
    y=average_random_pareto_volume_by_iteration,
    name='Random Search: Average Pareto Volume vs. Iteration'
))

fig.show()