# Preferential Bayesian Optimization: EI
This notebook demonstrates the use of the Expected Improvement (EI) acquisition function on ordinal (preference) data.

Formulation by Nguyen Quoc Phong.

In [1]:
import numpy as np
import gpflow
import tensorflow as tf
import matplotlib.pyplot as plt
import sys
import os
import pickle

from gpflow.utilities import set_trainable, print_summary
gpflow.config.set_default_summary_fmt("notebook")

sys.path.append(os.path.split(os.path.split(os.path.split(os.getcwd())[0])[0])[0]) # Move 3 levels up directory to import PBO
import PBO

In [2]:
objective = PBO.objectives.hartmann3d
objective_low = 0
objective_high = 1.
objective_name = "Hart3"
acquisition_name = "EI"
experiment_name = "PBO" + "_" + acquisition_name + "_" + objective_name

In [3]:
num_runs = 20
num_evals = 20
num_samples = 1000
num_choices = 2
input_dims = 3
num_maximizers = 20
num_init_points = 3
num_inducing_init = 3
num_discrete_per_dim = 100 # Discretization of continuous input space

In [4]:
results_dir = os.getcwd() + '/results/' + experiment_name + '/'

try:
    # Create target Directory
    os.makedirs(results_dir)
    print("Directory " , results_dir ,  " created ") 
except FileExistsError:
    print("Directory " , results_dir ,  " already exists")

Directory  /home/sebtsh/PBO/notebooks/EI/results/PBO_EI_Hart3/  created 


In [5]:
def get_noisy_observation(X, objective):
    f = PBO.objectives.objective_get_f_neg(X, objective)
    return PBO.observation_model.gen_observation_from_f(X, f, 1)

In [6]:
def train_and_visualize(X, y, num_inducing, title):
    
    # Train model with data
    q_mu, q_sqrt, u, inputs, k, indifference_threshold = PBO.models.learning_stochastic.train_model_fullcov(X, y, 
                                                                         num_inducing=num_inducing,
                                                                         obj_low=objective_low,
                                                                         obj_high=objective_high,
                                                                         num_steps=3000)
    likelihood = gpflow.likelihoods.Gaussian()
    model = PBO.models.learning.init_SVGP_fullcov(q_mu, q_sqrt, u, k, likelihood)
    u_mean = q_mu.numpy()
    inducing_vars = u.numpy()
    
    return model, inputs, u_mean, inducing_vars

In [7]:
def uniform_grid(input_dims, num_discrete_per_dim, low=0., high=1.):
    """
    Returns an array with all possible permutations of discrete values in input_dims number of dimensions.
    :param input_dims: int
    :param num_discrete_per_dim: int
    :param low: int
    :param high: int
    :return: tensor of shape (num_discrete_per_dim ** input_dims, input_dims)
    """
    num_points = num_discrete_per_dim ** input_dims
    out = np.zeros([num_points, input_dims])
    discrete_points = np.linspace(low, high, num_discrete_per_dim)
    for i in range(num_points):
        for dim in range(input_dims):
            val = num_discrete_per_dim ** (dim)
            out[i, dim] = discrete_points[int((i // val) % num_discrete_per_dim)]
    return out

This function is our main metric for the performance of the acquisition function: The closer the model's best guess to the global minimum, the better.

In [8]:
def best_guess(model):
    """
    Returns a GP model's best guess of the global maximum of f.
    """
    xx = uniform_grid(input_dims, num_discrete_per_dim, low=objective_low, high=objective_high)
    res = model.predict_f(xx)[0].numpy()
    return xx[np.argmax(res)]

Store the results in these arrays:

In [9]:
num_data_at_end = int((num_init_points-1) * num_init_points / 2 + num_evals)
X_results = np.zeros([num_runs, num_data_at_end, num_choices, input_dims])
y_results = np.zeros([num_runs, num_data_at_end, 1, input_dims])
best_guess_results = np.zeros([num_runs, num_evals, input_dims])

In [12]:
y_results = np.zeros([num_runs, num_data_at_end, 1, input_dims])  # REMOVE LATER

In [13]:
X_results[0] = X
y_results[0] = y

Create the initial values for each run:

In [10]:
np.random.seed(0)
init_points = np.random.uniform(low=objective_low, high=objective_high, size=[num_runs, num_init_points, input_dims])
num_combs = int((num_init_points-1) * num_init_points / 2)
init_vals = np.zeros([num_runs, num_combs, num_choices, input_dims])
for run in range(num_runs):
    cur_idx = 0
    for init_point in range(num_init_points-1):
        for next_point in range(init_point+1, num_init_points):
            init_vals[run, cur_idx, 0] = init_points[run, init_point]
            init_vals[run, cur_idx, 1] = init_points[run, next_point]
            cur_idx += 1

The following loops carry out the Bayesian optimization algorithm over a number of runs, with a fixed number of evaluations per run.

In [15]:
for run in range(1, num_runs):
    print("Beginning run %s" % (run))
    
    X = init_vals[run]
    y = get_noisy_observation(X, objective)
    
    model, inputs, u_mean, inducing_vars = train_and_visualize(X, y, num_inducing_init, "Run_{}:_Initial_model".format(run))

    for evaluation in range(num_evals):
        print("Beginning evaluation %s" % (evaluation)) 

        # Get incumbent maximizer
        maximizer = np.expand_dims(best_guess(model), axis=0)  # (1, input_dims)
        
        print("Maximizer:")
        print(maximizer)
        
        # Sample possible next input points. In EI, all queries are a pair with the incumbent maximizer as the 
        # first point and a next input point as the second point
        
        samples = np.random.uniform(low=objective_low,
                                    high=objective_high,
                                    size=(num_samples, input_dims))
        
        # Calculate EI vals
        ei_vals = PBO.acquisitions.ei.EI(model, maximizer, samples)
        
        # Select query that maximizes EI
        next_idx = np.argmax(ei_vals)
        next_query = np.zeros((num_choices, input_dims))
        next_query[0, :] = maximizer  # EI only works in binary choices
        next_query[1, :] = samples[next_idx]
        print("Evaluation %s: Next query is %s with EI value of %s" % (evaluation, next_query, ei_vals[next_idx]))

        X = np.concatenate([X, [next_query]])
        # Evaluate objective function
        y = np.concatenate([y, get_noisy_observation(np.expand_dims(next_query, axis=0), objective)], axis=0)
        
        print("Evaluation %s: Training model" % (evaluation))
        model, inputs, u_mean, inducing_vars = train_and_visualize(X, y, 
                                                                   num_inducing_init + evaluation + 1, 
                                                                   "Run_{}_Evaluation_{}".format(run, evaluation))

        best_guess_results[run, evaluation, :] = best_guess(model)
        
        pickle.dump((model.kernel.variance.numpy(), 
                     model.kernel.lengthscale.numpy(),
                     model.inducing_variable.Z.numpy(), 
                     model.q_mu.numpy(), 
                     model.q_sqrt.numpy()), 
                    open(results_dir + "Run_{}_Evaluation_{}_model.p".format(run, evaluation), "wb"))

    X_results[run] = X
    y_results[run] = y

Beginning run 1
Indifference_threshold is trainable.
Negative ELBO at step 0: 17.61162698744452 in 0.1617s
Negative ELBO at step 500: 2.691590149473259 in 49.1487s
Negative ELBO at step 1000: 2.256588760208899 in 50.6158s
Negative ELBO at step 1500: 2.207094527697997 in 52.1038s
Negative ELBO at step 2000: 2.0786714979930876 in 50.7919s
Negative ELBO at step 2500: 2.0539095958825944 in 52.0857s
Beginning evaluation 0
Maximizer:
[[0.56565657 1.         0.38383838]]
Evaluation 0: Next query is [[0.56565657 1.         0.38383838]
 [0.50918509 0.9826912  0.40495312]] with EI value of [0.37292236]
Evaluation 0: Training model
Indifference_threshold is trainable.
Negative ELBO at step 0: 12.558429402954927 in 0.1720s
Negative ELBO at step 500: 3.0835059611427194 in 61.6494s
Negative ELBO at step 1000: 2.834242296873967 in 62.8874s
Negative ELBO at step 1500: 2.71171377963912 in 62.4205s
Negative ELBO at step 2000: 2.7383506787215213 in 62.6384s
Negative ELBO at step 2500: 2.7656101972225753 

Negative ELBO at step 1500: 55.498303603528704 in 194.8208s
Negative ELBO at step 2000: 31.36739828197857 in 192.5573s
Negative ELBO at step 2500: 25.895956012738566 in 194.1972s
Beginning evaluation 13
Maximizer:
[[1.         0.         0.17171717]]
Evaluation 13: Next query is [[1.         0.         0.17171717]
 [0.99632519 0.05448062 0.95658966]] with EI value of [0.43405732]
Evaluation 13: Training model
Indifference_threshold is trainable.
Negative ELBO at step 0: 829360.4983643367 in 0.5432s
Negative ELBO at step 500: 8076.440096182777 in 204.1441s
Negative ELBO at step 1000: 3095.008116798073 in 206.0096s
Negative ELBO at step 1500: 1726.1018536209988 in 209.8674s
Negative ELBO at step 2000: 936.4581822847244 in 205.1465s
Negative ELBO at step 2500: 624.0143531384342 in 205.5991s
Beginning evaluation 14
Maximizer:
[[1. 0. 0.]]
Evaluation 14: Next query is [[1.         0.         0.        ]
 [0.84960956 0.99160966 0.05493101]] with EI value of [2.04474917]
Evaluation 14: Traini

Negative ELBO at step 0: 449.3282653481387 in 0.6061s
Negative ELBO at step 500: 22.542818907109556 in 118.8758s
Negative ELBO at step 1000: 9.68080837401631 in 120.2051s
Negative ELBO at step 1500: 7.672679195372485 in 118.3243s
Negative ELBO at step 2000: 7.32386326018321 in 117.4764s
Negative ELBO at step 2500: 7.198002656419273 in 117.8514s
Beginning evaluation 6
Maximizer:
[[0.         0.         0.17171717]]
Evaluation 6: Next query is [[0.00000000e+00 0.00000000e+00 1.71717172e-01]
 [1.13898532e-02 2.82572460e-04 2.89672828e-01]] with EI value of [0.3441612]
Evaluation 6: Training model
Indifference_threshold is trainable.
Negative ELBO at step 0: 2995.922542761678 in 0.3745s
Negative ELBO at step 500: 90.92521982434562 in 130.5748s
Negative ELBO at step 1000: 29.197640000223082 in 127.4217s
Negative ELBO at step 1500: 19.637795058537847 in 131.0978s
Negative ELBO at step 2000: 12.93609824661661 in 130.0365s
Negative ELBO at step 2500: 11.95327325443612 in 127.5708s
Beginning ev

Beginning evaluation 19
Maximizer:
[[0. 0. 0.]]
Evaluation 19: Next query is [[0.         0.         0.        ]
 [0.94650979 0.91137236 0.98827787]] with EI value of [1.04625545]
Evaluation 19: Training model
Indifference_threshold is trainable.
Negative ELBO at step 0: 464584.3476314632 in 0.6850s
Negative ELBO at step 500: 6368.611380100547 in 271.4192s
Negative ELBO at step 1000: 2440.7331431648545 in 272.0162s
Negative ELBO at step 1500: 1689.8697971180343 in 331.9129s
Negative ELBO at step 2000: 874.2390735919276 in 336.5660s
Negative ELBO at step 2500: 555.1502217610322 in 329.6064s
Beginning run 3
Indifference_threshold is trainable.
Negative ELBO at step 0: 26.77291117389093 in 0.1440s
Negative ELBO at step 500: 2.748689558758493 in 59.0626s
Negative ELBO at step 1000: 2.387556752959749 in 73.0517s
Negative ELBO at step 1500: 2.1172489073279364 in 66.1184s
Negative ELBO at step 2000: 2.182423786118708 in 67.6899s
Negative ELBO at step 2500: 2.0309959205793557 in 66.8035s
Begin

Negative ELBO at step 2000: 27.7914313081489 in 218.2531s
Negative ELBO at step 2500: 24.30672559679422 in 223.5750s
Beginning evaluation 12
Maximizer:
[[0. 1. 0.]]
Evaluation 12: Next query is [[0.         1.         0.        ]
 [0.33520549 0.97541066 0.00201947]] with EI value of [0.28440302]
Evaluation 12: Training model
Indifference_threshold is trainable.
Negative ELBO at step 0: 24858.70430509987 in 1.3157s
Negative ELBO at step 500: 515.9088156711155 in 243.8634s
Negative ELBO at step 1000: 222.4430884773429 in 239.8330s
Negative ELBO at step 1500: 111.17945034294696 in 243.0379s
Negative ELBO at step 2000: 71.75335106807535 in 251.8784s
Negative ELBO at step 2500: 55.18411946387601 in 239.2810s
Beginning evaluation 13
Maximizer:
[[0. 1. 0.]]
Evaluation 13: Next query is [[0.         1.         0.        ]
 [0.0317121  0.11769334 0.97499435]] with EI value of [0.58149877]
Evaluation 13: Training model
Indifference_threshold is trainable.
Negative ELBO at step 0: 9651.1351466846

Negative ELBO at step 2500: 6.131284768868955 in 105.0741s
Beginning evaluation 5
Maximizer:
[[1. 1. 1.]]
Evaluation 5: Next query is [[1.         1.         1.        ]
 [0.99118047 0.89970487 0.94364595]] with EI value of [0.28306042]
Evaluation 5: Training model
Indifference_threshold is trainable.
Negative ELBO at step 0: 562.6348146483714 in 0.3461s
Negative ELBO at step 500: 22.888736597691448 in 116.9350s
Negative ELBO at step 1000: 9.342711402886891 in 119.5687s
Negative ELBO at step 1500: 7.644829208009462 in 116.1157s
Negative ELBO at step 2000: 6.785954817897891 in 117.2862s
Negative ELBO at step 2500: 7.019258111007579 in 118.9229s
Beginning evaluation 6
Maximizer:
[[0.87878788 0.84848485 1.        ]]
Evaluation 6: Next query is [[0.87878788 0.84848485 1.        ]
 [0.91601679 0.8988941  0.92681297]] with EI value of [0.33751682]
Evaluation 6: Training model
Indifference_threshold is trainable.
Negative ELBO at step 0: 659.5490914352929 in 0.3323s
Negative ELBO at step 500:

Negative ELBO at step 0: 215815.43256116952 in 0.6917s
Negative ELBO at step 500: 4169.264165356263 in 261.7873s
Negative ELBO at step 1000: 1437.2568668916788 in 262.2184s
Negative ELBO at step 1500: 855.258317338011 in 263.8599s
Negative ELBO at step 2000: 518.1259739986968 in 262.0791s
Negative ELBO at step 2500: 357.45157644924285 in 261.2371s
Beginning evaluation 19
Maximizer:
[[0. 1. 0.]]
Evaluation 19: Next query is [[0.         1.         0.        ]
 [0.27957858 0.04471364 0.0054738 ]] with EI value of [0.75152699]
Evaluation 19: Training model
Indifference_threshold is trainable.
Negative ELBO at step 0: 376097.3356218744 in 0.7820s
Negative ELBO at step 500: 4077.2047542652144 in 271.2680s
Negative ELBO at step 1000: 1739.4233321827276 in 271.3443s
Negative ELBO at step 1500: 949.2174226955412 in 273.2586s
Negative ELBO at step 2000: 491.2158989257434 in 273.6743s
Negative ELBO at step 2500: 361.1963374355493 in 274.9306s
Beginning run 5
Indifference_threshold is trainable.


Negative ELBO at step 0: 16193.096500834414 in 0.4760s
Negative ELBO at step 500: 254.84768202582444 in 182.4748s
Negative ELBO at step 1000: 106.53321145024996 in 186.3095s
Negative ELBO at step 1500: 63.71698338465899 in 184.0300s


InvalidArgumentError: Input matrix is not invertible. [Op:MatrixTriangularSolve]

In [None]:
model

In [23]:
run

5

In [21]:
q_mu, q_sqrt, u, inputs, k, indifference_threshold = train_model_fullcov(X, y, 
                                                                         num_inducing=num_inducing_init + evaluation + 1,
                                                                         obj_low=objective_low,
                                                                         obj_high=objective_high,
                                                                         num_steps=3000)

Indifference_threshold is trainable.
Negative ELBO at step 0: 11510.962587647811 in 0.5924s
Negative ELBO at step 500: 277.2154424164847 in 184.2947s
Negative ELBO at step 1000: 94.97496947874512 in 187.1631s
Negative ELBO at step 1500: 57.367661811113976 in 187.3401s
Negative ELBO at step 2000: 33.90685986180857 in 186.6289s
Negative ELBO at step 2500: 27.572881712780564 in 188.1012s


In [22]:
print_summary(k)

name,class,transform,prior,trainable,shape,dtype,value
SquaredExponential.variance,Parameter,Softplus,,True,(),float64,1.2037156742358075
SquaredExponential.lengthscale,Parameter,Softplus,,True,"(3,)",float64,[0.82366564 0.82809989 0.81827668]


In [24]:
print_summary(model)

name,class,transform,prior,trainable,shape,dtype,value
SVGP.kernel.variance,Parameter,Softplus,,True,(),float64,1.1749471700546303
SVGP.kernel.lengthscale,Parameter,Softplus,,True,"(3,)",float64,[0.86938922 0.84125363 0.86010575]
SVGP.likelihood.variance,Parameter,Softplus + Shift,,True,(),float64,0.9999999999999999
SVGP.inducing_variable.Z,Parameter,,,False,"(14, 3)",float64,"[[0.46261772, 0.6743687, 0.36129394..."
SVGP.q_mu,Parameter,,,False,"(14, 1)",float64,[[3.67998729e-02...
SVGP.q_sqrt,Parameter,FillTriangular,,False,"(1, 14, 14)",float64,"[[[0.50164886, 0., 0...."


In [25]:
model.kernel.lengthscale.transform = gpflow.utilities.bijectors.positive(lower=gpflow.default_jitter())

In [26]:
print_summary(model)

name,class,transform,prior,trainable,shape,dtype,value
SVGP.kernel.variance,Parameter,Softplus,,True,(),float64,1.1749471700546303
SVGP.kernel.lengthscale,Parameter,Softplus + Shift,,True,"(3,)",float64,[0.86938922 0.84125363 0.86010575]
SVGP.likelihood.variance,Parameter,Softplus + Shift,,True,(),float64,0.9999999999999999
SVGP.inducing_variable.Z,Parameter,,,False,"(14, 3)",float64,"[[0.46261772, 0.6743687, 0.36129394..."
SVGP.q_mu,Parameter,,,False,"(14, 1)",float64,[[3.67998729e-02...
SVGP.q_sqrt,Parameter,FillTriangular,,False,"(1, 14, 14)",float64,"[[[0.50164886, 0., 0...."


In [None]:
pickle.dump((X_results, y_results, best_guess_results), open(results_dir + "Xybestguess.p", "wb"))

In [None]:
def dist(x, y):
    """
    x and y have shape (..., input_dims)
    """
    return np.sqrt(np.sum((x - y) * (x - y), axis=-1))

xx = uniform_grid(input_dims, num_discrete_per_dim, low=objective_low, high=objective_high)
global_min = xx[np.argmin(objective(xx))][0]

for i in range(best_guess_results.shape[0]):
    diff_from_min = dist(best_guess_results[i], global_min)
    
    x_axis = list(range(num_combs+1, num_combs+1+num_evals))
    plt.figure(figsize=(12, 6))
    plt.plot(x_axis, diff_from_min, 'kx', mew=2)
    plt.xticks(x_axis)
    plt.xlabel('Evaluations', fontsize=18)
    plt.ylabel('Best guess distance', fontsize=16)
    plt.title("Run %s" % i)
    plt.show()