# Preferential Bayesian Optimization: Dueling-Thompson Sampling

Implementation of the algorithm by Gonzalez et al (2017).

In [None]:
import numpy as np
import gpflow
import tensorflow as tf
import tensorflow_probability as tfp
import matplotlib.pyplot as plt
import sys
import os
import datetime
import pickle

from gpflow.utilities import set_trainable, print_summary
gpflow.config.set_default_summary_fmt("notebook")

sys.path.append(os.path.split(os.path.split(os.path.split(os.getcwd())[0])[0])[0]) # Move 3 levels up directory to import PBO
import PBO

In [None]:
print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))

In [None]:
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    # Restrict TensorFlow to only use the first GPU
    try:
        for gpu in gpus:
              tf.config.experimental.set_memory_growth(gpu, True)
        tf.config.experimental.set_visible_devices(gpus[3], 'GPU')
        logical_gpus = tf.config.experimental.list_logical_devices('GPU')
        print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPU")
    except RuntimeError as e:
        # Visible devices must be set before GPUs have been initialized
        print(e)

In [None]:
lengthscale = 5.
lengthscale_prior_alpha = tf.constant(2, dtype=tf.float64)
lengthscale_prior_beta = tf.constant(0.4, dtype=tf.float64)

In [None]:
def log(message):
    print(str(datetime.datetime.now()) + ': ' + message)

In [None]:
(xtrain, ytrain), (xtest, ytest) = tf.keras.datasets.cifar10.load_data()

In [None]:
cifar_embedding = pickle.load( open( "cifar_embedding.p", "rb" ) )

In [None]:
embedding_to_class = {}

In [None]:
for i in range(len(cifar_embedding)):
    embedding_to_class[cifar_embedding[i].data.tobytes()] = ytrain[i][0]

In [None]:
objective = lambda x: PBO.objectives.cifar(x, embedding_to_class)
objective_low = np.min(cifar_embedding)
objective_high = np.max(cifar_embedding)
objective_name = "CIFAR"
acquisition_name = "DTS"
experiment_name = "PBO" + "_" + acquisition_name + "_" + objective_name

In [None]:
def get_class(x):
    """
    :param x: tensor of shape (..., 2). CIFAR-10 embeddings
    :return: tensor of shape (..., 1). last dim is int from 0-9 representing class
    """
    shape = x.shape[:-1]
    raveled = np.reshape(x, [-1, 2])
    raveled_shape = raveled.shape[:-1]
    raveled_classes = np.zeros((raveled_shape[0], 1), dtype=np.int8)
    
    for i in range(raveled_shape[0]):
        raveled_classes[i] = embedding_to_class[raveled[i].data.tobytes()]
        
    return np.reshape(raveled_classes, shape + (1,))

In [None]:
num_runs = 20
num_evals = 20
num_samples = 100
num_choices = 2
input_dims = 2
num_maximizers = 20
num_init_points = 3
num_inducing_init = 3
num_discrete_per_dim = 40 # Discretization of continuous input space

In [None]:
results_dir = os.getcwd() + '/results/' + experiment_name + '/'

try:
    # Create target Directory
    os.makedirs(results_dir)
    print("Directory " , results_dir ,  " created ") 
except FileExistsError:
    print("Directory " , results_dir ,  " already exists")

In [None]:
def visualize_model(query_points, y, m, title="Model", cmap="Spectral"):
    if query_points.shape[-1] != 2:
        return

    pos_vals = []
    neg_vals = []
    for i in range(len(y)):
        if y[i]:
            pos_vals.append(query_points[i])
        else:
            neg_vals.append(query_points[i])
    pos_vals = np.array(pos_vals)
    neg_vals = np.array(neg_vals)

    num_discrete_points = num_discrete_per_dim
    side = np.linspace(0, 1, num_discrete_points)
    X,Y = np.meshgrid(side,side)
    preds = tf.transpose(tf.reshape(m.predict_y(combs)[0], [num_discrete_points, num_discrete_points]))
    variances = tf.transpose(tf.reshape(PBO.acquisitions.dts.variance_logistic_f(m, combs), [num_discrete_points, num_discrete_points]))

    fig, (ax1, ax2) = plt.subplots(1, 2)
    fig.suptitle(title)
    fig.set_size_inches(18.5, 6.88)
    fig.set_dpi((200))
    
    ax1.axis('equal')
    if len(pos_vals) != 0:
        ax1.scatter(*pos_vals.T, c="black", marker="o")
    if len(neg_vals) != 0:
        ax1.scatter(*neg_vals.T, c="black", marker="x")
    im1 = ax1.imshow(preds, interpolation='nearest', extent=(0.0, 1.0, 0.0, 1.0), origin='lower', cmap=cmap)
    ax1.set_title("Mean of y(x, x')")
    ax1.set_xlabel("x")
    ax1.set_ylabel("x'")
    ax1.axvline(x=0.757, linestyle='--')
    fig.colorbar(im1, ax=ax1)

    ax2.axis('equal')
    if len(pos_vals) != 0:
        ax2.scatter(*pos_vals.T, c="black", marker="o")
    if len(neg_vals) != 0:
        ax2.scatter(*neg_vals.T, c="black", marker="x")
    im2 = ax2.imshow(variances, interpolation='nearest', extent=(0.0, 1.0, 0.0, 1.0), origin='lower', cmap=cmap)
    ax2.set_title("Variance of y(x, x')")
    ax2.set_xlabel("x")
    ax2.set_ylabel("x'")
    fig.colorbar(im2, ax=ax2)
    
    plt.savefig(fname=results_dir + title + ".png")

    plt.show()

In [None]:
def visualize_f_sample(f_vals, cmap="Spectral"):
    fig, (ax1) = plt.subplots(1)
    fig.suptitle('Sampled f values')
    fig.set_size_inches(4, 3.3)
    fig.set_dpi((100))
    
    ax1.axis('equal')
    im1 = ax1.imshow(tf.transpose(tf.reshape(f_vals, [num_discrete_points, num_discrete_points])),
                     interpolation='nearest', extent=(0.0, 1.0, 0.0, 1.0), origin='lower', cmap=cmap)
    ax1.set_xlabel("x")
    ax1.set_ylabel("x'")
    ax1.axvline(x=0.757, linestyle='--')
    fig.colorbar(im1, ax=ax1)

In [None]:
def std_representation(X, num_choices):
    """
    :param X: tensor of shape (num_data, input_dims * num_choices)
    :return: tensor of shape (num_data, num_choices, input_dims)
    """
    input_dims = X.shape[-1] // num_choices
    ret_val = np.zeros((X.shape[0], num_choices, input_dims))
    
    for i in range(num_choices):
        ret_val[:, i, :] = X[:, input_dims*i:input_dims*(i+1)]
        
    return ret_val

In [None]:
def get_noisy_observation_dts(X, objective):
    """
    :param X: tensor of shape (num_data, input_dims * 2)
    :param objective: objective function
    """
    num_data = X.shape[0]
    X_std = std_representation(X, num_choices) # (num_data, num_choices, input_dims)
    f = PBO.objectives.objective_get_f_neg(X_std, objective)
    obs = np.array(PBO.observation_model.gen_observation_from_f(X_std, f, 1))  # (num_data, 1, input_dims)

    ret_val = np.zeros((num_data, 1), dtype=np.int8)
    for i in range(num_data):
        if np.allclose(X_std[i, 0], obs[i, 0]):
            ret_val[i] = 1
    return ret_val

In [None]:
def train_and_visualize(X, y, lengthscale, title):
    lengthscale_prior = tfp.distributions.Gamma(concentration=lengthscale_prior_alpha,
                                               rate=lengthscale_prior_beta)
    kernel = gpflow.kernels.Product([gpflow.kernels.RBF(lengthscale=lengthscale, 
                                                        active_dims=[i, i+input_dims]) 
                                     for i in range(input_dims)])
    for k in kernel.kernels:
        k.lengthscale.transform = gpflow.utilities.bijectors.positive(lower=gpflow.default_jitter())
        k.lengthscale.prior = lengthscale_prior
    
    m = gpflow.models.VGP(data=(X, y),
                      kernel=kernel,
                      likelihood=gpflow.likelihoods.Bernoulli())
    optimizer = gpflow.optimizers.Scipy()
    optimizer.minimize(lambda: -m.log_likelihood(), m.trainable_variables)
    visualize_model(X, y, m, title=title)
    return m

In [None]:
def uniform_grid(input_dims, num_discrete_per_dim, low, high):
    """
    Returns an array with all possible permutations of discrete values in input_dims number of dimensions.
    :param input_dims: int
    :param num_discrete_per_dim: int
    :param low: int
    :param high: int
    :return: tensor of shape (num_discrete_per_dim ** input_dims, input_dims)
    """
    num_points = num_discrete_per_dim ** input_dims
    out = np.zeros([num_points, input_dims])
    discrete_points = np.linspace(low, high, num_discrete_per_dim)
    for i in range(num_points):
        for dim in range(input_dims):
            val = num_discrete_per_dim ** (dim)
            out[i, dim] = discrete_points[int((i // val) % num_discrete_per_dim)]
    return out

In [None]:
def best_guess(m, discrete_space, combs):
    return PBO.acquisitions.dts.soft_copeland_maximizer(m.predict_f(combs)[0], discrete_space)

In [None]:
def pref_inversions(model):
    """
    Method to evaluate models over discrete preference rankings. Given an objective preference ranking over classes, 
    we calculate the average mean the model assigns to each class, sort the classes according to this average mean,
    then calculate the number of inversions required to reach the desired objective preference ranking. 0 inversions
    means the model has learned the preference ranking perfectly. The more inversions, the further away the model is.
    """
    def count_inversions(input_list):
        def swap(lst, i, j):
            tmp = lst[j]
            lst[j] = lst[i]
            lst[i] = tmp

        lst = input_list.copy()
        num_inversions = 0
        changed = True
        while changed:
            changed = False
            for i in range(len(lst) - 1):
                if lst[i] > lst[i+1]:
                    swap(lst, i, i+1)
                    num_inversions += 1
                    changed = True
                    
        return num_inversions
    
    
    class_to_posval = {0: -0.1,
                     1: -0.2,
                     8: -0.3,
                     9: -0.4,
                     2: -0.5,
                     3: -0.6,
                     4: -0.7,
                     5: -0.8,
                     6: -0.9,
                     7: -1.}  # higher is more preferred here
    
    fvals = model.predict_f(cifar_embedding)[0]
    indices = get_class(cifar_embedding)
    
    average_f = tf.scatter_nd(indices=indices,
                   updates=np.squeeze(fvals),
                   shape=tf.constant([10]))/5000
    sorted_f = sorted(list(zip(average_f, range(10))))
    
    model_posvals = []
    for pair in sorted_f:
        model_posvals.append(class_to_posval[pair[1]])
        
    return count_inversions(model_posvals)

In [None]:
def flip(X):
    """
    :param X: tensor of shape (num_data, input_dims * 2)
    :return: tensor of shape (num_data, input_dims * 2), where the first input_dims is swapped with the second
    """
    input_dims = X.shape[-1] // 2
    ret_val = np.zeros((X.shape))
    for i in range(X.shape[0]):
        ret_val[i, :input_dims] = X[i, input_dims:]
        ret_val[i, input_dims:] = X[i, :input_dims]
    return ret_val

In [None]:
def flip_y(y):
    """
    :param y: tensor of shape (num_data, 1), with int values either 0 or 1
    """
    return (y + 1) % 2

Create the initial values for each run:

In [None]:
np.random.seed(0)
random_indices = np.random.choice(cifar_embedding.shape[0], [num_runs, num_init_points], replace=False)
init_points = np.take(cifar_embedding, random_indices, axis=0)
num_combs = int((num_init_points-1) * num_init_points / 2)
init_vals = np.zeros([num_runs, num_combs, num_choices, input_dims])
for run in range(num_runs):
    cur_idx = 0
    for init_point in range(num_init_points-1):
        for next_point in range(init_point+1, num_init_points):
            init_vals[run, cur_idx, 0] = init_points[run, init_point]
            init_vals[run, cur_idx, 1] = init_points[run, next_point]
            cur_idx += 1

init_vals = np.reshape(init_vals, [num_runs, num_combs, num_choices * input_dims])

Store the results in these arrays:

In [None]:
num_data_at_end = (num_combs + num_evals) * 2
X_results = np.zeros([num_runs, num_data_at_end, input_dims * num_choices])
y_results = np.zeros([num_runs, num_data_at_end, 1])
best_guess_results = np.zeros([num_runs, num_evals])

In [None]:
for run in range(num_runs):
    log("Starting run {}".format(run))
    #Fit a GP with kernel k to Dn
    
    X = init_vals[run]
    y = get_noisy_observation_dts(X, objective)
    
    X = np.vstack([X, flip(X)])
    y = np.vstack([y, flip_y(y)])
    
    model = train_and_visualize(X, y, lengthscale=lengthscale, title="Run_{}_Initial_model".format(run))
    
    for evaluation in range(num_evals):
        log("Starting evaluation " + str(evaluation))
        
        # Get random subset of cifar_embedding to use per evaluation, as using the whole thing takes up too much space
        subset_indices = np.random.choice(cifar_embedding.shape[0], num_discrete_per_dim ** 2, replace=False)
        discrete_space = np.take(cifar_embedding, subset_indices, axis=0)
        combs = PBO.acquisitions.dts.combinations(discrete_space)
        
        is_valid_query = False
        num_tries = 0
        while not is_valid_query:
            # Sample f using RFF
            f_vals = PBO.acquisitions.dts.sample_f(model, X, combs)

            # 2 and 3. Compute the acquisition for duels alpha and get next duel
            log("Computing acquisition function")
            x_next = PBO.acquisitions.dts.soft_copeland_maximizer(f_vals, discrete_space)        

            all_pairs = np.concatenate([np.tile(x_next, (discrete_space.shape[0], 1)), discrete_space], axis=1)
            next_vars = np.squeeze(PBO.acquisitions.dts.variance_logistic_f(model, all_pairs), 
                                   axis=1)
            xprime_next = discrete_space[np.argmax(next_vars)]

            x_xprime_next = np.expand_dims(np.concatenate([x_next, xprime_next]), axis=0)

            # If both x and x' are equal, or the query has already been made, will cause Fourier features matrix
            # to become non-invertible later on
            if np.all(np.equal(x_xprime_next, flip(x_xprime_next))) or x_xprime_next in X:
                log("Invalid query, resampling f")
                num_tries += 1
                if num_tries >= 100:
                    raise ValueError
            else:
                log("x and x_prime: \n" + str(x_xprime_next))
                is_valid_query = True
        
        # 4. Run the duel and get y
        y_next = get_noisy_observation_dts(x_xprime_next, objective)
        log("y_next: \n" + str(y_next))
        
        # 5. Augment X and Y, and add symmetric points
        X = np.vstack([X, x_xprime_next, flip(x_xprime_next)])
        y = np.vstack([y, y_next, flip_y(y_next)])
        
        # Fit a GP with kernel k to Dj and learn pi(x).
        model = train_and_visualize(X, y, lengthscale=lengthscale, title="Run_{}_Evaluation_{}".format(run, evaluation))
        
        # Save model
        kernels_variance = []
        kernels_lengthscale = []
        for k in model.kernel.kernels:
            kernels_variance.append(k.variance.numpy())
            kernels_lengthscale.append(k.lengthscale.numpy())

        pickle.dump((X, y, 
                    tuple(kernels_variance),
                    tuple(kernels_lengthscale),
                    model.q_mu.numpy(),
                    model.q_sqrt.numpy()), 
                 open(results_dir + "Model_Run_{}_Evaluation_{}.p".format(run, evaluation), "wb"))
        
        # Get current best guess
        best_guess_results[run, evaluation] = pref_inversions(model)

    X_results[run] = X
    y_results[run] = y

In [None]:
pickle.dump((X_results, y_results, best_guess_results), open(results_dir + "Xybestguess.p", "wb"))

In [None]:
for i in range(best_guess_results.shape[0]):    
    x_axis = list(range(num_combs+1, num_combs+1+num_evals))
    plt.figure(figsize=(12, 6))
    plt.plot(x_axis, best_guess_results[i], 'kx', mew=2)
    plt.xticks(x_axis)
    plt.xlabel('Evaluations', fontsize=18)
    plt.ylabel('Inversions', fontsize=16)
    plt.title("Run %s" % i)
    plt.show()