# Preferential Bayesian Optimization: RANDOM
This notebook demonstrates the use of random querying on ordinal (preference) data.

Formulation by Nguyen Quoc Phong.

In [1]:
import numpy as np
import gpflow
import tensorflow as tf
import matplotlib.pyplot as plt
import sys
import os
import pickle

from gpflow.utilities import set_trainable, print_summary
gpflow.config.set_default_summary_fmt("notebook")

sys.path.append(os.path.split(os.path.split(os.path.split(os.getcwd())[0])[0])[0]) # Move 3 levels up directory to import PBO
import PBO

In [2]:
objective = PBO.objectives.hartmann3d
objective_low = 0
objective_high = 1.
objective_name = "Hart3"
acquisition_name = "RANDOM"
experiment_name = "PBO" + "_" + acquisition_name + "_" + objective_name

In [3]:
num_runs = 20
num_evals = 20
num_samples = 1000
num_choices = 2
input_dims = 3
num_maximizers = 20
num_init_points = 3
num_inducing_init = 3
num_discrete_per_dim = 100 # Discretization of continuous input space

In [4]:
results_dir = os.getcwd() + '/results/' + experiment_name + '/'

try:
    # Create target Directory
    os.makedirs(results_dir)
    print("Directory " , results_dir ,  " created ") 
except FileExistsError:
    print("Directory " , results_dir ,  " already exists")

Directory  /home/sebtsh/PBO/notebooks/RANDOM/results/PBO_RANDOM_Hart3/  created 


In [5]:
def get_noisy_observation(X, objective):
    f = PBO.objectives.objective_get_f_neg(X, objective)
    return PBO.observation_model.gen_observation_from_f(X, f, 1)

In [6]:
def train_and_visualize(X, y, num_inducing, title):
    
    # Train model with data
    q_mu, q_sqrt, u, inputs, k, indifference_threshold = PBO.models.learning_stochastic.train_model_fullcov(X, y, 
                                                                         num_inducing=num_inducing,
                                                                         obj_low=objective_low,
                                                                         obj_high=objective_high,
                                                                         num_steps=3000)
    likelihood = gpflow.likelihoods.Gaussian()
    model = PBO.models.learning.init_SVGP_fullcov(q_mu, q_sqrt, u, k, likelihood)
    u_mean = q_mu.numpy()
    inducing_vars = u.numpy()
    
    return model, inputs, u_mean, inducing_vars

In [7]:
def uniform_grid(input_dims, num_discrete_per_dim, low=0., high=1.):
    """
    Returns an array with all possible permutations of discrete values in input_dims number of dimensions.
    :param input_dims: int
    :param num_discrete_per_dim: int
    :param low: int
    :param high: int
    :return: tensor of shape (num_discrete_per_dim ** input_dims, input_dims)
    """
    num_points = num_discrete_per_dim ** input_dims
    out = np.zeros([num_points, input_dims])
    discrete_points = np.linspace(low, high, num_discrete_per_dim)
    for i in range(num_points):
        for dim in range(input_dims):
            val = num_discrete_per_dim ** (dim)
            out[i, dim] = discrete_points[int((i // val) % num_discrete_per_dim)]
    return out

This function is our main metric for the performance of the acquisition function: The closer the model's best guess to the global minimum, the better.

In [8]:
def best_guess(model):
    """
    Returns a GP model's best guess of the global maximum of f.
    """
    xx = uniform_grid(input_dims, num_discrete_per_dim, low=objective_low, high=objective_high)
    res = model.predict_f(xx)[0].numpy()
    return xx[np.argmax(res)]

Store the results in these arrays:

In [9]:
num_data_at_end = int((num_init_points-1) * num_init_points / 2 + num_evals)
X_results = np.zeros([num_runs, num_data_at_end, num_choices, input_dims])
y_results = np.zeros([num_runs, num_data_at_end, 1, input_dims])
best_guess_results = np.zeros([num_runs, num_evals, input_dims])

In [12]:
y_results = np.zeros([num_runs, num_data_at_end, 1, input_dims])  # REMOVE LATER

In [13]:
X_results[0] = X
y_results[0] = y

Create the initial values for each run:

In [10]:
np.random.seed(0)
init_points = np.random.uniform(low=objective_low, high=objective_high, size=[num_runs, num_init_points, input_dims])
num_combs = int((num_init_points-1) * num_init_points / 2)
init_vals = np.zeros([num_runs, num_combs, num_choices, input_dims])
for run in range(num_runs):
    cur_idx = 0
    for init_point in range(num_init_points-1):
        for next_point in range(init_point+1, num_init_points):
            init_vals[run, cur_idx, 0] = init_points[run, init_point]
            init_vals[run, cur_idx, 1] = init_points[run, next_point]
            cur_idx += 1

The following loops carry out the Bayesian optimization algorithm over a number of runs, with a fixed number of evaluations per run.

In [None]:
for run in range(1, num_runs):
    print("Beginning run %s" % (run))
    
    X = init_vals[run]
    y = get_noisy_observation(X, objective)
    
    model, inputs, u_mean, inducing_vars = train_and_visualize(X, y, num_inducing_init, "Run_{}:_Initial_model".format(run))

    for evaluation in range(num_evals):
        print("Beginning evaluation %s" % (evaluation)) 
        
        existing_idx = np.random.randint(0, inputs.shape[0])
        existing_input = inputs[existing_idx]
        random_input = np.random.uniform(low=objective_low, 
                                         high=objective_high, 
                                         size=(1, input_dims))
        
        next_query = np.zeros((num_choices, input_dims))
        next_query[0, :] = existing_input
        next_query[1, :] = random_input
        print("Evaluation %s: Next query is %s" % (evaluation, next_query))

        X = np.concatenate([X, [next_query]])
        # Evaluate objective function
        y = np.concatenate([y, get_noisy_observation(np.expand_dims(next_query, axis=0), objective)], axis=0)
        
        print("Evaluation %s: Training model" % (evaluation))
        model, inputs, u_mean, inducing_vars = train_and_visualize(X, y, 
                                                                   num_inducing_init + evaluation + 1, 
                                                                   "Run_{}_Evaluation_{}".format(run, evaluation))

        best_guess_results[run, evaluation, :] = best_guess(model)

    X_results[run] = X
    y_results[run] = y

Beginning run 1
Indifference_threshold is trainable.
Negative ELBO at step 0: 4.779772578379597 in 0.1606s
Negative ELBO at step 500: 2.0156255224296142 in 50.9795s
Negative ELBO at step 1000: 1.9257136299962938 in 51.4875s
Negative ELBO at step 1500: 1.8556707630674003 in 50.7842s
Negative ELBO at step 2000: 1.8467978810640415 in 51.2707s
Negative ELBO at step 2500: 1.8371685569263123 in 50.9255s
Beginning evaluation 0
Evaluation 0: Next query is [[0.38344152 0.79172504 0.52889492]
 [0.45841243 0.48503053 0.89551293]]
Evaluation 0: Training model
Indifference_threshold is trainable.
Negative ELBO at step 0: 18.10444828740892 in 0.2020s
Negative ELBO at step 500: 3.138039001477157 in 61.7645s
Negative ELBO at step 1000: 2.917865595200535 in 64.5104s
Negative ELBO at step 1500: 2.836159043271749 in 62.6303s
Negative ELBO at step 2000: 2.7945133416450836 in 61.2656s
Negative ELBO at step 2500: 2.732047502994801 in 63.2500s
Beginning evaluation 1
Evaluation 1: Next query is [[0.0871293  0

Negative ELBO at step 500: 1230.1617191024911 in 216.4630s
Negative ELBO at step 1000: 530.6063666394657 in 215.1507s
Negative ELBO at step 1500: 343.5894307816746 in 217.7420s
Negative ELBO at step 2000: 237.85726811765895 in 214.5110s
Negative ELBO at step 2500: 184.55130948860315 in 212.3838s
Beginning evaluation 15
Evaluation 15: Next query is [[0.36322396 0.2467724  0.4935906 ]
 [0.98738334 0.78745161 0.49138007]]
Evaluation 15: Training model
Indifference_threshold is trainable.
Negative ELBO at step 0: 91880.3426559101 in 0.7468s
Negative ELBO at step 500: 1608.055362915929 in 224.5526s
Negative ELBO at step 1000: 449.5522871478969 in 225.8814s
Negative ELBO at step 1500: 253.88022917892002 in 228.5586s
Negative ELBO at step 2000: 158.43576088290246 in 229.7877s
Negative ELBO at step 2500: 109.7559888100703 in 224.8064s
Beginning evaluation 16
Evaluation 16: Next query is [[0.0871293  0.0202184  0.83261985]
 [0.52522331 0.04001707 0.59096908]]
Evaluation 16: Training model
Indif

Beginning evaluation 9
Evaluation 9: Next query is [[0.55899164 0.49769535 0.95750981]
 [0.18870537 0.87368075 0.72288922]]
Evaluation 9: Training model
Indifference_threshold is trainable.
Negative ELBO at step 0: 2833.37943590122 in 0.4011s
Negative ELBO at step 500: 68.31827816708048 in 162.2782s
Negative ELBO at step 1000: 32.13313876350563 in 165.0028s
Negative ELBO at step 1500: 20.01146066620774 in 163.6623s
Negative ELBO at step 2000: 16.13313933788129 in 161.2956s
Negative ELBO at step 2500: 12.182129954937384 in 164.9241s
Beginning evaluation 10
Evaluation 10: Next query is [[0.69597422 0.33277685 0.09545662]
 [0.28646463 0.50159147 0.87941746]]
Evaluation 10: Training model
Indifference_threshold is trainable.
Negative ELBO at step 0: 43799.55631785604 in 0.5079s
Negative ELBO at step 500: 483.51863519048896 in 170.0344s
Negative ELBO at step 1000: 186.17201131584508 in 174.0138s
Negative ELBO at step 1500: 96.89366343673052 in 170.3564s
Negative ELBO at step 2500: 48.867678

Negative ELBO at step 1000: 5.236924555712864 in 110.7002s
Negative ELBO at step 1500: 4.790246628615833 in 110.7368s
Negative ELBO at step 2000: 4.85970468501439 in 113.7009s
Negative ELBO at step 2500: 4.72076799455319 in 114.6326s
Beginning evaluation 4
Evaluation 4: Next query is [[0.94466892 0.52184832 0.41466194]
 [0.01032792 0.67777514 0.17808517]]
Evaluation 4: Training model
Indifference_threshold is trainable.
Negative ELBO at step 0: 699.1266493639813 in 0.7335s
Negative ELBO at step 500: 21.271094289119073 in 128.8915s
Negative ELBO at step 1000: 10.875690584204808 in 129.4869s
Negative ELBO at step 1500: 9.09320787372588 in 131.5051s
Negative ELBO at step 2000: 6.503829298926016 in 126.0047s
Negative ELBO at step 2500: 6.031148115022983 in 124.2356s
Beginning evaluation 5
Evaluation 5: Next query is [[0.94466892 0.52184832 0.41466194]
 [0.93686283 0.77386297 0.40563389]]
Evaluation 5: Training model
Indifference_threshold is trainable.
Negative ELBO at step 0: 971.29289923

Negative ELBO at step 0: 356913.7872141588 in 0.9475s
Negative ELBO at step 500: 4368.258640796421 in 270.4691s
Negative ELBO at step 1000: 2098.703499444171 in 270.3396s
Negative ELBO at step 1500: 935.3913009060619 in 271.9633s
Negative ELBO at step 2000: 579.7192269254251 in 271.3851s
Negative ELBO at step 2500: 395.4660821472497 in 268.7728s
Beginning run 4
Indifference_threshold is trainable.
Negative ELBO at step 0: 186.08960630251428 in 0.1376s
Negative ELBO at step 500: 4.398323705091533 in 52.1526s
Negative ELBO at step 1000: 3.3818472764326115 in 51.3150s
Negative ELBO at step 1500: 2.990324374509067 in 52.0631s
Negative ELBO at step 2000: 2.3944261570928074 in 50.5624s
Negative ELBO at step 2500: 2.2759111816437767 in 52.2584s
Beginning evaluation 0
Evaluation 0: Next query is [[0.61209572 0.616934   0.94374808]
 [0.4155887  0.9074198  0.49630736]]
Evaluation 0: Training model
Indifference_threshold is trainable.
Negative ELBO at step 0: 11.778647096891607 in 0.2024s
Negativ

Negative ELBO at step 2500: 59.044524047746734 in 204.0126s
Beginning evaluation 14
Evaluation 14: Next query is [[0.1058747  0.88811617 0.50349775]
 [0.90677455 0.29565155 0.64990036]]
Evaluation 14: Training model
Indifference_threshold is trainable.
Negative ELBO at step 0: 166280.43658862618 in 0.5747s
Negative ELBO at step 500: 2227.0167541702203 in 220.3471s
Negative ELBO at step 1000: 781.8022480480076 in 213.6443s
Negative ELBO at step 1500: 306.327854215858 in 217.0813s
Negative ELBO at step 2000: 318.92262262378244 in 214.6636s
Negative ELBO at step 2500: 168.89857567748123 in 215.8775s
Beginning evaluation 15
Evaluation 15: Next query is [[0.6976312  0.06022547 0.66676672]
 [0.67428712 0.04974564 0.71965364]]
Evaluation 15: Training model
Indifference_threshold is trainable.
Negative ELBO at step 0: 66949.25796476145 in 0.6170s
Negative ELBO at step 500: 1027.6511544768146 in 225.6830s
Negative ELBO at step 1000: 525.2739779790202 in 224.9574s
Negative ELBO at step 1500: 263

Negative ELBO at step 0: 1511.9685555462063 in 0.3587s
Negative ELBO at step 500: 40.14852620599396 in 150.5049s
Negative ELBO at step 1000: 19.55244004265611 in 152.4193s
Negative ELBO at step 1500: 13.806800107014926 in 148.2280s
Negative ELBO at step 2000: 10.865960703568964 in 149.3646s
Negative ELBO at step 2500: 10.095381419808902 in 148.6182s
Beginning evaluation 9
Evaluation 9: Next query is [[0.34765174 0.16888337 0.16242146]
 [0.69624826 0.61328558 0.4861621 ]]
Evaluation 9: Training model
Indifference_threshold is trainable.
Negative ELBO at step 0: 4054.799265648625 in 0.4154s
Negative ELBO at step 500: 113.77472241129777 in 160.6445s
Negative ELBO at step 1000: 53.57979955382428 in 160.3073s
Negative ELBO at step 1500: 27.335887483870664 in 160.8895s
Negative ELBO at step 2000: 18.85403680747641 in 165.2240s
Negative ELBO at step 2500: 15.805386153067184 in 161.4523s
Beginning evaluation 10
Evaluation 10: Next query is [[0.30730966 0.50981511 0.43438855]
 [0.40902322 0.882

In [None]:
print_summary(model)

In [None]:
pickle.dump((X_results, y_results, best_guess_results), open(results_dir + "Xybestguess.p", "wb"))

In [None]:
def dist(x, y):
    """
    x and y have shape (..., input_dims)
    """
    return np.sqrt(np.sum((x - y) * (x - y), axis=-1))

xx = uniform_grid(input_dims, num_discrete_per_dim, low=objective_low, high=objective_high)
global_min = xx[np.argmin(objective(xx))][0]

for i in range(best_guess_results.shape[0]):
    diff_from_min = dist(best_guess_results[i], global_min)
    
    x_axis = list(range(num_combs+1, num_combs+1+num_evals))
    plt.figure(figsize=(12, 6))
    plt.plot(x_axis, diff_from_min, 'kx', mew=2)
    plt.xticks(x_axis)
    plt.xlabel('Evaluations', fontsize=18)
    plt.ylabel('Best guess distance', fontsize=16)
    plt.title("Run %s" % i)
    plt.show()