# Preferential Bayesian Optimization: EI
This notebook demonstrates the use of the Expected Improvement (EI) acquisition function on ordinal (preference) data.

Formulation by Nguyen Quoc Phong.

In [1]:
import numpy as np
import gpflow
import tensorflow as tf
import matplotlib.pyplot as plt
import sys
import os
import pickle

from gpflow.utilities import set_trainable, print_summary
gpflow.config.set_default_summary_fmt("notebook")

sys.path.append(os.path.split(os.path.split(os.path.split(os.getcwd())[0])[0])[0]) # Move 3 levels up directory to import PBO
import PBO

In [2]:
objective = PBO.objectives.hartmann3d
objective_low = 0
objective_high = 1.
objective_name = "Hart3"
acquisition_name = "EI"
experiment_name = "PBO" + "_" + acquisition_name + "_" + objective_name

In [3]:
num_runs = 20
num_evals = 20
num_samples = 1000
num_choices = 2
input_dims = 3
num_maximizers = 20
num_init_points = 3
num_inducing_init = 3
num_discrete_per_dim = 100 # Discretization of continuous input space

In [4]:
results_dir = os.getcwd() + '/results/' + experiment_name + '/'

try:
    # Create target Directory
    os.makedirs(results_dir)
    print("Directory " , results_dir ,  " created ") 
except FileExistsError:
    print("Directory " , results_dir ,  " already exists")

Directory  /home/sebtsh/PBO/notebooks/EI/results/PBO_EI_Hart3/  already exists


In [5]:
def get_noisy_observation(X, objective):
    f = PBO.objectives.objective_get_f_neg(X, objective)
    return PBO.observation_model.gen_observation_from_f(X, f, 1)

In [6]:
def train_and_visualize(X, y, num_inducing, title):
    
    # Train model with data
    q_mu, q_sqrt, u, inputs, k, indifference_threshold = PBO.models.learning_stochastic.train_model_fullcov(X, y, 
                                                                         num_inducing=num_inducing,
                                                                         obj_low=objective_low,
                                                                         obj_high=objective_high,
                                                                         num_steps=3000)
    likelihood = gpflow.likelihoods.Gaussian()
    model = PBO.models.learning.init_SVGP_fullcov(q_mu, q_sqrt, u, k, likelihood)
    u_mean = q_mu.numpy()
    inducing_vars = u.numpy()
    
    return model, inputs, u_mean, inducing_vars

In [7]:
def uniform_grid(input_dims, num_discrete_per_dim, low=0., high=1.):
    """
    Returns an array with all possible permutations of discrete values in input_dims number of dimensions.
    :param input_dims: int
    :param num_discrete_per_dim: int
    :param low: int
    :param high: int
    :return: tensor of shape (num_discrete_per_dim ** input_dims, input_dims)
    """
    num_points = num_discrete_per_dim ** input_dims
    out = np.zeros([num_points, input_dims])
    discrete_points = np.linspace(low, high, num_discrete_per_dim)
    for i in range(num_points):
        for dim in range(input_dims):
            val = num_discrete_per_dim ** (dim)
            out[i, dim] = discrete_points[int((i // val) % num_discrete_per_dim)]
    return out

This function is our main metric for the performance of the acquisition function: The closer the model's best guess to the global minimum, the better.

In [8]:
def best_guess(model):
    """
    Returns a GP model's best guess of the global maximum of f.
    """
    xx = uniform_grid(input_dims, num_discrete_per_dim, low=objective_low, high=objective_high)
    res = model.predict_f(xx)[0].numpy()
    return xx[np.argmax(res)]

Store the results in these arrays:

In [9]:
num_data_at_end = int((num_init_points-1) * num_init_points / 2 + num_evals)
X_results = np.zeros([num_runs, num_data_at_end, num_choices, input_dims])
y_results = np.zeros([num_runs, num_data_at_end, 1, input_dims])
best_guess_results = np.zeros([num_runs, num_evals, input_dims])

Create the initial values for each run:

In [10]:
np.random.seed(0)
init_points = np.random.uniform(low=objective_low, high=objective_high, size=[num_runs, num_init_points, input_dims])
num_combs = int((num_init_points-1) * num_init_points / 2)
init_vals = np.zeros([num_runs, num_combs, num_choices, input_dims])
for run in range(num_runs):
    cur_idx = 0
    for init_point in range(num_init_points-1):
        for next_point in range(init_point+1, num_init_points):
            init_vals[run, cur_idx, 0] = init_points[run, init_point]
            init_vals[run, cur_idx, 1] = init_points[run, next_point]
            cur_idx += 1

The following loops carry out the Bayesian optimization algorithm over a number of runs, with a fixed number of evaluations per run.

In [11]:
for run in range(num_runs):
    print("Beginning run %s" % (run))
    
    X = init_vals[run]
    y = get_noisy_observation(X, objective)
    
    model, inputs, u_mean, inducing_vars = train_and_visualize(X, y, num_inducing_init, "Run_{}:_Initial_model".format(run))

    for evaluation in range(num_evals):
        print("Beginning evaluation %s" % (evaluation)) 

        # Get incumbent maximizer
        maximizer = np.expand_dims(best_guess(model), axis=0)  # (1, input_dims)
        
        print("Maximizer:")
        print(maximizer)
        
        # Sample possible next input points. In EI, all queries are a pair with the incumbent maximizer as the 
        # first point and a next input point as the second point
        
        samples = np.random.uniform(low=objective_low,
                                    high=objective_high,
                                    size=(num_samples, input_dims))
        
        # Calculate EI vals
        ei_vals = PBO.acquisitions.ei.EI(model, maximizer, samples)
        
        # Select query that maximizes EI
        next_idx = np.argmax(ei_vals)
        next_query = np.zeros((num_choices, input_dims))
        next_query[0, :] = maximizer  # EI only works in binary choices
        next_query[1, :] = samples[next_idx]
        print("Evaluation %s: Next query is %s with EI value of %s" % (evaluation, next_query, ei_vals[next_idx]))

        X = np.concatenate([X, [next_query]])
        # Evaluate objective function
        y = np.concatenate([y, get_noisy_observation(np.expand_dims(next_query, axis=0), objective)], axis=0)
        
        print("Evaluation %s: Training model" % (evaluation))
        model, inputs, u_mean, inducing_vars = train_and_visualize(X, y, 
                                                                   num_inducing_init + evaluation + 1, 
                                                                   "Run_{}_Evaluation_{}".format(run, evaluation))

        best_guess_results[run, evaluation, :] = best_guess(model)
        
        pickle.dump((model.kernel.variance.numpy(), 
                     model.kernel.lengthscale.numpy(),
                     model.inducing_variable.Z.numpy(), 
                     model.q_mu.numpy(), 
                     model.q_sqrt.numpy()), 
                    open(results_dir + "Run_{}_Evaluation_{}_model.p".format(run, evaluation), "wb"))

    X_results[run] = X
    y_results[run] = y

Beginning run 0
Indifference_threshold is trainable.
Instructions for updating:
Use tf.identity instead.
Negative ELBO at step 0: 7.269339792522033 in 0.2291s
Negative ELBO at step 500: 2.2747047046285003 in 53.2784s
Negative ELBO at step 1000: 2.061834863461045 in 52.1164s
Negative ELBO at step 1500: 2.1425262940705623 in 51.5013s
Negative ELBO at step 2000: 2.0919999830423746 in 60.9488s
Negative ELBO at step 2500: 2.138971449508106 in 60.1255s
Beginning evaluation 0
Maximizer:
[[0.64646465 0.17171717 0.        ]]
Evaluation 0: Next query is [[0.64646465 0.17171717 0.        ]
 [0.6090939  0.18511639 0.00620341]] with EI value of [0.37960182]
Evaluation 0: Training model
Indifference_threshold is trainable.
Negative ELBO at step 0: 206.06655683262238 in 0.2569s
Negative ELBO at step 500: 10.548103926089214 in 71.9289s
Negative ELBO at step 1000: 5.105079274545194 in 79.4841s
Negative ELBO at step 1500: 3.661707759438784 in 79.9528s
Negative ELBO at step 2000: 3.2896125189383056 in 72

Negative ELBO at step 0: 37544.67349619091 in 0.5672s
Negative ELBO at step 500: 664.3567069109073 in 232.1328s
Negative ELBO at step 1000: 301.39863010834205 in 230.2457s
Negative ELBO at step 1500: 145.28763189990826 in 228.6570s
Negative ELBO at step 2000: 74.62249578707097 in 233.7544s
Negative ELBO at step 2500: 68.78685486041611 in 232.9551s
Beginning evaluation 13
Maximizer:
[[1. 0. 0.]]
Evaluation 13: Next query is [[1.         0.         0.        ]
 [0.9976618  0.22095679 0.13858573]] with EI value of [0.7007696]
Evaluation 13: Training model
Indifference_threshold is trainable.
Negative ELBO at step 0: 27294.071802858638 in 0.5683s
Negative ELBO at step 500: 502.50854910375364 in 239.9422s
Negative ELBO at step 1000: 163.2015872098046 in 238.2877s
Negative ELBO at step 1500: 84.85222388609695 in 241.6681s
Negative ELBO at step 2000: 59.30275255032723 in 242.3696s
Negative ELBO at step 2500: 36.490532600778444 in 244.4570s
Beginning evaluation 14
Maximizer:
[[1. 0. 0.]]
Evalu

Negative ELBO at step 0: 514.4598049245024 in 0.8897s
Negative ELBO at step 500: 22.592444895413372 in 145.8517s
Negative ELBO at step 1000: 10.974557413974674 in 142.5990s
Negative ELBO at step 1500: 8.602127261543192 in 141.7456s
Negative ELBO at step 2000: 7.551942980243785 in 145.0837s
Negative ELBO at step 2500: 6.706481983487654 in 145.4247s
Beginning evaluation 6
Maximizer:
[[0.8989899  1.         0.02020202]]
Evaluation 6: Next query is [[0.8989899  1.         0.02020202]
 [0.84705056 0.96634746 0.06667913]] with EI value of [0.33015444]
Evaluation 6: Training model
Indifference_threshold is trainable.
Negative ELBO at step 0: 623.3364552600303 in 0.3633s
Negative ELBO at step 500: 37.060107837549594 in 155.3933s
Negative ELBO at step 1000: 14.756222549280466 in 153.7916s
Negative ELBO at step 1500: 10.489809765388427 in 153.9261s
Negative ELBO at step 2000: 8.388772664340275 in 155.4069s
Negative ELBO at step 2500: 8.120668114440287 in 153.3284s
Beginning evaluation 7
Maximize

Beginning evaluation 19
Maximizer:
[[1.         0.01010101 0.        ]]
Evaluation 19: Next query is [[1.00000000e+00 1.01010101e-02 0.00000000e+00]
 [1.56952430e-01 9.98176846e-01 4.71618399e-04]] with EI value of [0.65566994]
Evaluation 19: Training model
Indifference_threshold is trainable.
Negative ELBO at step 0: 473126.89661543205 in 1.1258s
Negative ELBO at step 500: 3829.566599337756 in 328.0213s
Negative ELBO at step 1000: 1527.4836654325825 in 332.0931s
Negative ELBO at step 1500: 826.4970399907639 in 330.1468s
Negative ELBO at step 2000: 439.699295572972 in 327.9863s
Negative ELBO at step 2500: 300.68434102601964 in 327.6205s
Beginning run 2
Indifference_threshold is trainable.
Negative ELBO at step 0: 5.638178347464526 in 0.1631s
Negative ELBO at step 500: 2.461086073088487 in 52.8679s
Negative ELBO at step 1000: 2.149235663841378 in 54.6032s
Negative ELBO at step 1500: 2.211927512030171 in 66.9355s
Negative ELBO at step 2000: 2.1685546866507415 in 75.1759s
Negative ELBO at

Negative ELBO at step 1000: 73.25618257882611 in 223.3266s
Negative ELBO at step 1500: 49.157415809221185 in 227.2464s
Negative ELBO at step 2000: 27.020032514690747 in 223.3588s
Negative ELBO at step 2500: 22.438544216488403 in 227.3468s
Beginning evaluation 12
Maximizer:
[[0.         0.14141414 0.        ]]
Evaluation 12: Next query is [[0.         0.14141414 0.        ]
 [0.99091681 0.02937577 0.86832147]] with EI value of [0.46112962]
Evaluation 12: Training model
Indifference_threshold is trainable.
Negative ELBO at step 0: 21382.294975237684 in 0.8664s
Negative ELBO at step 500: 293.54867427990524 in 233.8367s
Negative ELBO at step 1000: 141.0880535167569 in 232.0635s
Negative ELBO at step 1500: 74.72486459414064 in 237.3929s
Negative ELBO at step 2000: 47.79529408068335 in 234.3175s
Negative ELBO at step 2500: 34.27626707711556 in 238.1157s
Beginning evaluation 13
Maximizer:
[[0.         0.02020202 0.        ]]
Evaluation 13: Next query is [[0.         0.02020202 0.        ]
 [0

Negative ELBO at step 0: 1022.3662501038154 in 0.3256s
Negative ELBO at step 500: 28.39058844766633 in 108.0804s
Negative ELBO at step 1000: 11.805579939726456 in 108.9912s
Negative ELBO at step 1500: 9.378225262732556 in 107.2947s
Negative ELBO at step 2000: 7.1696732132570125 in 107.7509s
Negative ELBO at step 2500: 6.713343782388246 in 108.9588s
Beginning evaluation 5
Maximizer:
[[0.         1.         0.14141414]]
Evaluation 5: Next query is [[0.         1.         0.14141414]
 [0.02784481 0.99651345 0.92535954]] with EI value of [0.32060636]
Evaluation 5: Training model
Indifference_threshold is trainable.
Negative ELBO at step 0: 251.66429859190495 in 0.3423s
Negative ELBO at step 500: 15.004004887849508 in 118.0179s
Negative ELBO at step 1000: 8.956733805810737 in 120.5694s
Negative ELBO at step 1500: 7.405955762017938 in 119.4722s
Negative ELBO at step 2000: 6.90785134593078 in 118.7116s
Negative ELBO at step 2500: 6.625726077437704 in 119.0716s
Beginning evaluation 6
Maximizer

Beginning evaluation 18
Maximizer:
[[0. 1. 0.]]
Evaluation 18: Next query is [[0.         1.         0.        ]
 [0.01869681 0.97685445 0.21695474]] with EI value of [0.41194973]
Evaluation 18: Training model
Indifference_threshold is trainable.
Negative ELBO at step 0: 155202.03540824982 in 0.8146s
Negative ELBO at step 500: 2247.265277189916 in 263.0494s
Negative ELBO at step 1000: 885.4022511167387 in 265.0090s
Negative ELBO at step 1500: 489.05967921013774 in 263.8044s
Negative ELBO at step 2000: 278.0996039837689 in 265.7401s
Negative ELBO at step 2500: 175.87791628479047 in 264.6759s
Beginning evaluation 19
Maximizer:
[[0. 1. 0.]]
Evaluation 19: Next query is [[0.         1.         0.        ]
 [0.0929386  0.98976848 0.13551056]] with EI value of [0.7381459]
Evaluation 19: Training model
Indifference_threshold is trainable.
Negative ELBO at step 0: 232711.64534844932 in 0.8444s
Negative ELBO at step 500: 2704.577751673334 in 277.3472s
Negative ELBO at step 1000: 1290.6066837840

KeyboardInterrupt: 

In [None]:
pickle.dump((X_results, y_results, best_guess_results), open(results_dir + "Xybestguess.p", "wb"))

In [None]:
def dist(x, y):
    """
    x and y have shape (..., input_dims)
    """
    return np.sqrt(np.sum((x - y) * (x - y), axis=-1))

xx = uniform_grid(input_dims, num_discrete_per_dim, low=objective_low, high=objective_high)
global_min = xx[np.argmin(objective(xx))][0]

for i in range(best_guess_results.shape[0]):
    diff_from_min = dist(best_guess_results[i], global_min)
    
    x_axis = list(range(num_combs+1, num_combs+1+num_evals))
    plt.figure(figsize=(12, 6))
    plt.plot(x_axis, diff_from_min, 'kx', mew=2)
    plt.xticks(x_axis)
    plt.xlabel('Evaluations', fontsize=18)
    plt.ylabel('Best guess distance', fontsize=16)
    plt.title("Run %s" % i)
    plt.show()