# Preferential Bayesian Optimization: Multinomial Predictive Entropy Search

In [1]:
import numpy as np
import gpflow
import tensorflow as tf
import tensorflow_probability as tfp
import matplotlib.pyplot as plt
import sys
import os
import pickle

from gpflow.utilities import set_trainable, print_summary
gpflow.config.set_default_summary_fmt("notebook")

sys.path.append(os.path.split(os.path.split(os.path.split(os.getcwd())[0])[0])[0]) # Move 3 levels up directory to import PBO
import PBO

In [2]:
gpu_to_use = 0

print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))

gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    # Restrict TensorFlow to only use the first GPU
    try:
        for gpu in gpus:
              tf.config.experimental.set_memory_growth(gpu, True)
        tf.config.experimental.set_visible_devices(gpus[gpu_to_use], 'GPU')
        logical_gpus = tf.config.experimental.list_logical_devices('GPU')
        print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPU")
    except RuntimeError as e:
        # Visible devices must be set before GPUs have been initialized
        print(e)

Num GPUs Available:  1
1 Physical GPUs, 1 Logical GPU


In [3]:
features = pickle.load( open( "sushi_features.p", "rb" ) )

In [4]:
fvals = pickle.load( open( "fvals.p", "rb" ) )

In [5]:
# construct dict
feat_to_fval_dict = {}
for i in range(len(features)):
    key = features[i].data.tobytes()
    feat_to_fval_dict[key] = fvals[i]

In [6]:
objective = lambda x: PBO.objectives.sushi(x, feat_to_fval_dict)
objective_low = np.min(features)
objective_high = np.max(features)
objective_name = "SUSHI"
acquisition_name = "MPES"
experiment_name = "PBO" + "_" + acquisition_name + "_" + objective_name + "v1"

In [7]:
num_runs = 1
num_evals = 35
num_choices = 2
input_dims = 6
num_maximizers = 20
num_maximizers_init = 50
num_fourier_features = 1000
num_init_prefs = 6 

In [8]:
results_dir = os.getcwd() + '/results/' + experiment_name + '/'

try:
    # Create target Directory
    os.makedirs(results_dir)
    print("Directory " , results_dir ,  " created ") 
except FileExistsError:
    print("Directory " , results_dir ,  " already exists")

Directory  /home/seb/PBO/notebooks/SUSHI/results/PBO_MPES_SUSHIv1/  already exists


In [9]:
def get_noisy_observation(X, objective):
    f = PBO.objectives.objective_get_f_neg(X, objective)
    return PBO.observation_model.gen_observation_from_f(X, f, 1)

In [10]:
def train_and_visualize(X, y, title, lengthscale_init=None, signal_variance_init=None):
    
    # Train model with data
    result = PBO.models.learning_fullgp.train_model_fullcov(
                        X, y, 
                        obj_low=objective_low,
                        obj_high=objective_high,
                        lengthscale_init=lengthscale_init,
                        signal_variance_init=signal_variance_init,
                        indifference_threshold=0.,
                        n_sample=1000,
                        deterministic=True, # only sample f values once, not re-sampling
                        num_steps=3000)
    
    q_mu = result['q_mu']
    q_sqrt = result['q_sqrt']
    u = result['u']
    inputs = result['inputs']
    k = result['kernel']
    
    likelihood = gpflow.likelihoods.Gaussian()
    model = PBO.models.learning.init_SVGP_fullcov(q_mu, q_sqrt, u, k, likelihood)
    u_mean = q_mu.numpy()
    inducing_vars = u.numpy()
    
    return model, inputs, u_mean, inducing_vars

Generate rank dictionary and immediate regret dictionary.

In [11]:
fval_idx_tuples = pickle.load(open("fval_idx_tuples.p", "rb"))

In [12]:
rank_dict = {}

for i in range(len(fval_idx_tuples)):
    rank_dict[features[fval_idx_tuples[i][1]].data.tobytes()] = i + 1

This function is our main metric for the performance of the acquisition function.

In [13]:
def get_max_sushi(model, features, rank_dict):
    """
    :param model: gpflow model
    :param features: sushi features
    :param rank_dict: dictionary from sushi idx to place in ranking
    :return: tuple (index of max sushi, rank)
    """
    f_preds = model.predict_f(features)[0]
    max_idx = np.argmax(f_preds)
    
    return (max_idx, rank_dict[features[max_idx].data.tobytes()])

Store the results in these arrays:

In [14]:
num_data_at_end = int(num_init_prefs + num_evals)
X_results = np.zeros([num_runs, num_data_at_end, num_choices, input_dims])
y_results = np.zeros([num_runs, num_data_at_end, 1, input_dims])
immediate_regret = np.zeros([num_runs, num_evals], np.int32)

Create the initial values for each run:

In [15]:
np.random.seed(0)
random_indices = np.random.choice(features.shape[0], [num_runs, num_init_prefs, num_choices])
init_vals = np.take(features, random_indices, axis=0)

The following loops carry out the Bayesian optimization algorithm over a number of runs, with a fixed number of evaluations per run.

In [16]:
for run in range(num_runs):  # CHECK IF STARTING RUN IS CORRECT
    print("Beginning run %s" % (run))
    
    X = init_vals[run]
    y = get_noisy_observation(X, objective)
    
    model, inputs, u_mean, inducing_vars = train_and_visualize(X, y, "Run_{}:_Initial_model".format(run))

    for evaluation in range(num_evals):
        print("Beginning evaluation %s" % (evaluation)) 
        
        success = False
        fail_count = 0
        while not success:
            # TODO: THIS ONLY WORKS FOR TOP-1 OF 2, CHANGE TO APPROPRIATE QUERY SAMPLING FOR HIGHER NUMBER OF CHOICES
            samples = PBO.models.learning_fullgp.construct_input_pairs(inputs, features)

            # Sample maximizers
            print("Evaluation %s: Sampling maximizers" % (evaluation))
            maximizers = PBO.fourier_features.sample_maximizers(X=inducing_vars,
                                                                count=num_maximizers,
                                                                n_init=num_maximizers_init,
                                                                D=num_fourier_features,
                                                                model=model,
                                                                min_val=objective_low,
                                                                max_val=objective_high)
            print(maximizers)

            # Calculate PES value I for each possible next query
            print("Evaluation %s: Calculating I" % (evaluation))
            I_vals = PBO.acquisitions.pes.I_batch(samples, maximizers, model)

            # Select query that maximizes I
            next_idx = np.argmax(I_vals)
            next_query = samples[next_idx]
            print("Evaluation %s: Next query is %s with I value of %s" % (evaluation, next_query, I_vals[next_idx]))

            X_temp = np.concatenate([X, [next_query]])
            # Evaluate objective function
            y_temp = np.concatenate([y, get_noisy_observation(np.expand_dims(next_query, axis=0), objective)], axis=0)
            
            try:
                print("Evaluation %s: Training model" % (evaluation))
                model, inputs, u_mean, inducing_vars = train_and_visualize(X_temp, y_temp,
                                                                           "Run_{}_Evaluation_{}".format(run, evaluation))
                success = True

            except ValueError as err:
                print(err)
                print("Retrying sampling random inputs")
                fail_count += 1

            if fail_count >= 10:
                print("Retry limit exceeded")
                raise ValueError("Failed")
                
        
        X = X_temp
        y = y_temp
        
        # Save model
        pickle.dump((X, y, inputs, 
                     model.kernel.variance, 
                     model.kernel.lengthscale, 
                     model.likelihood.variance, 
                     inducing_vars, 
                     model.q_mu, 
                     model.q_sqrt, 
                     maximizers), 
                    open(results_dir + "Model_Run_{}_Evaluation_{}.p".format(run, evaluation), "wb"))

        (max_idx, rank) = get_max_sushi(model, features, rank_dict)
        immediate_regret[run, evaluation] = rank - 1
        
        print("Maximizing sushi has index {} and rank {}".format(max_idx, rank)) 

    X_results[run] = X
    y_results[run] = y

Beginning run 0
Optimizer config:  {'name': 'RMSprop', 'learning_rate': 0.001, 'decay': 0.0, 'rho': 0.0, 'momentum': 0.0, 'epsilon': 1e-07, 'centered': False}
Indifference_threshold is fixed at 0.0
Initialize lengthscale at [0.5 0.5 0.5 0.5 0.5 0.5]
       signal variance at 1.0
   Initial negative ELBO: 86.35006495122589
Negative ELBO at step 0: 85.64465908077763 in 0.4060s
Negative ELBO at step 500: 10.381171853269084 in 25.4133s
Negative ELBO at step 1000: 9.90423326902899 in 25.5013s
Negative ELBO at step 1500: 9.688349761688485 in 25.6424s
Negative ELBO at step 2000: 9.508869428627689 in 25.8005s
Negative ELBO at step 2500: 9.376282041933706 in 25.1005s
Beginning evaluation 0
Evaluation 0: Sampling maximizers
Loss at step 0: 0.0718984941055924
Loss at step 500: -2.147649261958214
Loss at step 1000: -2.5722252429093726
Loss at step 1500: -2.604344794172573
Loss at step 2000: -2.611768747452007
Loss at step 2500: -2.6154376126968955
Loss at step 2951: -2.61655844512157
test.shape = 

Negative ELBO at step 500: 12.54511035951067 in 33.2432s
Negative ELBO at step 1000: 11.779717188015557 in 33.6251s
Negative ELBO at step 1500: 11.482580735919782 in 33.5466s
Negative ELBO at step 2000: 11.239790832249707 in 33.6183s
Negative ELBO at step 2500: 11.074240220542256 in 33.1667s
Maximizing sushi has index 8 and rank 2
Beginning evaluation 3
Evaluation 3: Sampling maximizers
Loss at step 0: 0.05764271416797862
Loss at step 500: -2.2654512999377587
Loss at step 1000: -2.685642934821562
Loss at step 1500: -2.725329884616824
Loss at step 2000: -2.7310845174126595
Loss at step 2500: -2.732685677220019
Loss at step 2588: -2.7327142853776327
test.shape =  (20, 50, 1)
tf.Tensor(
[[1.         0.03710688 0.         0.65648182 0.46906372 0.        ]
 [1.         0.06586694 1.         0.85524702 0.41244184 0.        ]
 [0.82901566 0.         0.01849154 0.73618222 1.         0.85452875]
 [0.29201512 0.         0.6389197  0.46372098 0.90107676 0.44692015]
 [1.         0.         0.20049

Negative ELBO at step 500: 15.096759737105383 in 42.0030s
Negative ELBO at step 1000: 13.629531549800621 in 41.5280s
Negative ELBO at step 1500: 13.411255934596976 in 41.7937s
Negative ELBO at step 2000: 13.25853884505249 in 41.6678s
Negative ELBO at step 2500: 13.144825322890846 in 41.2123s
Maximizing sushi has index 47 and rank 5
Beginning evaluation 6
Evaluation 6: Sampling maximizers
Loss at step 0: -0.0156717466956185
Loss at step 500: -2.432648700070239
Loss at step 1000: -2.937366665577561
Loss at step 1500: -2.991309481446824
Loss at step 2000: -3.018127198809487
Loss at step 2500: -3.0283314517848767
Loss at step 2897: -3.0299183050627545
test.shape =  (20, 50, 1)
tf.Tensor(
[[0.87333184 0.         0.74461599 0.6735759  0.76666382 0.02596788]
 [1.         0.         0.23814728 1.         0.84277643 0.33276418]
 [1.         0.         0.68025813 0.72160766 0.9320894  0.19762026]
 [0.9545649  0.01454988 0.68690295 0.82026926 0.92708938 0.2927525 ]
 [0.81829394 0.         1.     

Negative ELBO at step 1000: 16.265180315375503 in 49.6037s
Negative ELBO at step 1500: 16.083719775338146 in 50.0646s
Negative ELBO at step 2000: 15.951101491566753 in 50.0524s
Negative ELBO at step 2500: 15.894065195576538 in 49.6256s
Maximizing sushi has index 47 and rank 5
Beginning evaluation 9
Evaluation 9: Sampling maximizers
Loss at step 0: -0.14360378424698408
Loss at step 500: -1.5111309273784772
Loss at step 1000: -1.6193868384638055
Loss at step 1500: -1.628877563836081
Loss at step 1664: -1.6299534470264412
test.shape =  (20, 50, 1)
tf.Tensor(
[[0.88303109 0.         0.5722298  0.50514277 0.22003669 0.32067058]
 [0.90302878 0.         0.1569447  0.65022923 0.65594532 0.76989403]
 [1.         0.         0.82167554 0.41021458 0.81070649 0.16958069]
 [1.         0.08034465 0.51409976 0.73950111 0.40184818 0.86306538]
 [0.18458675 0.64264226 0.45789921 0.26880842 0.61712122 0.1424156 ]
 [0.98014469 0.         0.23163358 0.62937673 0.662015   0.88943921]
 [0.90614296 0.         

Maximizing sushi has index 36 and rank 32
Beginning evaluation 12
Evaluation 12: Sampling maximizers
Loss at step 0: -0.10081300750740771
Loss at step 500: -1.313149490964629
Loss at step 1000: -1.3992920981922554
Loss at step 1379: -1.4063263274439832
test.shape =  (20, 50, 1)
tf.Tensor(
[[1.         0.         0.66444376 0.84169208 0.28022257 1.        ]
 [0.93482023 0.         0.44541964 0.74380307 0.24668274 0.90323174]
 [0.98660048 0.04417628 0.36466394 0.76183269 0.29901626 1.        ]
 [1.         0.15757442 0.59049515 0.63181575 0.30077853 0.54011874]
 [0.91483375 0.56890467 0.75166846 0.56482927 0.75488146 0.6037753 ]
 [0.88960745 0.13886349 0.70718272 0.75208146 0.28406593 0.75118795]
 [1.         0.         0.23735672 0.61175189 0.28152411 0.47022835]
 [0.         0.         0.26310305 0.96498089 0.39484466 0.04756668]
 [0.91632888 0.43895751 0.62266464 0.42683958 0.61468818 0.33597519]
 [1.         0.         0.41471912 0.65747588 0.27816571 0.98654636]
 [0.92643423 0.14208

Loss at step 1000: -1.515332280153909
Loss at step 1500: -1.532540913653196
Loss at step 1938: -1.5357725935586308
test.shape =  (20, 50, 1)
tf.Tensor(
[[1.         0.         0.63144329 0.60735213 0.28057202 0.25927357]
 [0.99876793 0.         0.12930729 0.53189456 1.         0.68758411]
 [0.90643153 0.         0.68767828 0.51736509 0.28241337 0.34588147]
 [0.98596089 0.         0.57731262 0.30157957 0.28873775 0.        ]
 [0.9988143  0.         0.69215288 0.54708118 0.29007055 0.45555928]
 [0.9440405  0.03749593 0.50616884 0.36900077 0.29087666 0.        ]
 [0.9770203  0.         0.58356863 0.9358839  0.29965752 1.        ]
 [1.         0.         0.48957042 0.63439613 0.29474999 0.3700621 ]
 [0.9040811  0.         0.53915974 0.54850177 0.30707011 0.39690942]
 [1.         0.06586434 0.64197855 0.31737147 0.29096901 0.        ]
 [0.96956859 0.         0.71769501 0.32563042 0.28172076 0.        ]
 [1.         0.         0.65419578 0.34278775 0.61298869 0.25769781]
 [1.         0.     

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "/home/seb/anaconda3/envs/gpflow/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 3331, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-16-20bf0a142c0b>", line 26, in <module>
    max_val=objective_high)
  File "/home/seb/PBO/fourier_features.py", line 150, in sample_maximizers
    optimizer.minimize(loss, var_list=[x_star])
  File "/home/seb/anaconda3/envs/gpflow/lib/python3.6/site-packages/tensorflow_core/python/keras/optimizer_v2/optimizer_v2.py", line 316, in minimize
    loss, var_list=var_list, grad_loss=grad_loss)
  File "/home/seb/anaconda3/envs/gpflow/lib/python3.6/site-packages/tensorflow_core/python/keras/optimizer_v2/optimizer_v2.py", line 350, in _compute_gradients
    loss_value = loss()
  File "/home/seb/PBO/fourier_features.py", line 146, in <lambda>
    loss = lambda: construct_maximizer_objective(x_star)
  File "/home/seb/PBO/fourier_features.py", line 134, in constru

KeyboardInterrupt: 

In [None]:
pickle.dump((X_results, y_results, immediate_regret), open(results_dir + "res.p", "wb"))