The aim of this notebook is to test the implementation by using TS to optimize the
Alpine2 function.

## Setup

In [1]:
import torch
from torch import Tensor
from botorch.models import SingleTaskGP
from botorch.test_functions import SyntheticTestFunction
from botorch.acquisition import PosteriorMean
from botorch.optim import optimize_acqf
from gpytorch.mlls import ExactMarginalLogLikelihood
from gpytorch.utils.errors import NotPSDError
from botorch import fit_gpytorch_model
from gp_sampling.decoupled_samplers import decoupled_sampler
from gp_sampling.thompson_samplers import decoupled_ts, exact_ts
from typing import Optional, Union
import math
from botorch.models.transforms import Standardize
%matplotlib
import matplotlib.pyplot as plt
from time import time

Using matplotlib backend: TkAgg


In [2]:
class Alpine2(SyntheticTestFunction):
    """
    The Alpine-2 test function.

    n-dimensional function typically evaluated on x_i in [0, 10].
    This implementation is rescaled to [0, 1].

    A(x) = - prod_{i=1}^n sqrt(x_i) sin(x_i).
        (negated to make it into a minimization problem by default)
    Scaled (w/ x_i in [0, 1]):
    A(x) = - prod_{i=1}^n sqrt(10 * x_i) sin(10 * x_i)

    The global optimum is found at x_i ≈ 7.91705268466621...
    Rescaled: 0.7917052...
    """

    def __init__(
        self, dim=6, noise_std: Optional[float] = None, negate: bool = False
    ) -> None:
        self.dim = dim
        self._bounds = [(0.0, 1.0) for _ in range(self.dim)]
        self._optimizers = [tuple(0.791705268466621 for _ in range(self.dim))]
        self._optimal_value = -math.pow(2.808130979537964, self.dim)
        super().__init__(noise_std=noise_std, negate=negate)

    def evaluate_true(self, X: Tensor) -> Tensor:
        X = 10 * X
        return -torch.prod(torch.sqrt(X) * torch.sin(X), dim=-1, keepdim=True)

In [3]:
def run_one_replication(
    iterations: int,
    dim: int,
    num_draws: int = 2 ** 6,
    q: int = 1,
    num_basis: int = 2 ** 8,
    decoupled: bool = True,
    seed: Optional[int] = None,
    device: Optional[Union[torch.device, str]] = None
) -> Tensor:
    r"""
    Runs one replication of BO using TS to optimize Alpine2 function.

    Args:
        iterations: Number of iterations
        dim: Dimension of the problem
        num_draws: Number of samples to use for TS
        q: Number of parallel evaluations
        num_basis: Number of basis functions to use for decoupled sampler
        decoupled: If True, uses decoupled sampler. Otherwise, samples from exact GP.
        seed: The seed for random number generation
        device: Option to specify cpu / gpu. If not decoupled, defaults to GPU if
        available. For decoupled, defaults to CPU.

    Returns:
        An `iterations+1` tensor of output performance. Evaluated as the function value
            at the maximizer of PosteriorMean.
    """
    if device is None:
        if decoupled:
            device = torch.device("cpu")
        else:
            device = (
                torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
            )
    else:
        device = torch.device(device) if isinstance(device, str) else device
    if seed is not None:
        torch.manual_seed(seed)
    function = Alpine2(dim=dim)
    bounds = torch.tensor([[0.], [1.]], device=device).repeat(1, dim)
    train_X = torch.rand(2*dim + 2, 2, device=device)
    train_Y = function(train_X).reshape(-1, 1)

    def current_best() -> Tensor:
        r"""
        Returns the current best solution value, evaluated as the true function value
        at the maximizer of PosteriorMean

        Returns:
            A tensor with the current best value
        """
        pm = PosteriorMean(model)
        current_best_point, _ = optimize_acqf(
            acq_function=pm,
            bounds=bounds,
            q=1,
            num_restarts=10*dim,
            raw_samples=200*dim
        )
        return function.evaluate_true(current_best_point).reshape(-1)

    def update_gp() -> SingleTaskGP:
        r"""
        Updates (refits) the GP model using the most recent data

        Returns:
            The fitted GP model
        """
        gp = SingleTaskGP(train_X, train_Y, outcome_transform=Standardize(m=1))
        mll = ExactMarginalLogLikelihood(gp.likelihood, gp)
        fit_gpytorch_model(mll)
        return gp

    output = torch.empty(iterations+1, device=device)
    for i in range(iterations):
        iter_start = time()
        # fit the gp and get the current performance
        model = update_gp()
        output[i] = current_best()
        if decoupled:
            # draw the thompson sample using decoupled sampler
            ps = decoupled_sampler(model=model, sample_shape=[q], num_basis=num_basis)
            next_sample = decoupled_ts(ps, num_draws=num_draws, d=dim)
        else:
            # draw the thompson sample using exact posterior
            next_sample = exact_ts(model, num_draws=num_draws, d=dim)
        next_eval = function(next_sample).reshape(-1, 1)
        train_X = torch.cat([train_X, next_sample])
        train_Y = torch.cat([train_Y, next_eval])
        print("iter %d with decoupled %s took %s" % (i, decoupled, time()-iter_start))

    # add the final performance after all observations
    model = update_gp()
    output[-1] = current_best()
    return output

Run a comparison of exact TS and decoupled TS on the 2d Alpine2 problem.

In [4]:
replications = 2
iterations = 50
dim = 2
decoupled_out = torch.zeros(replications, iterations+1)
exact_out = torch.zeros(replications, iterations+1)
run_decoupled = True
run_exact = True
executed = torch.ones(replications, dtype=torch.long)
for i in range(replications):
    try:
        if run_decoupled:
            decoupled_out[i] = run_one_replication(
                iterations, dim, decoupled=True, seed=i
            )
        if run_exact:
            exact_out[i] = run_one_replication(iterations, dim, decoupled=False, seed=i)
    except NotPSDError:
        executed[i] = 0

executed = executed.to(dtype=torch.bool)
plt.plot(torch.mean(decoupled_out[executed], dim=0), label="decoupled TS")
plt.plot(torch.mean(exact_out[executed], dim=0), label="exact TS")
plt.grid(True)
plt.legend()
plt.show()

  return torch._C._cuda_getDeviceCount() > 0


iter 0 with decoupled False took 0.2189338207244873
iter 1 with decoupled False took 0.46135568618774414
iter 2 with decoupled False took 0.19448161125183105
iter 3 with decoupled False took 0.7627942562103271
iter 4 with decoupled False took 0.9630508422851562
iter 5 with decoupled False took 0.8840408325195312
iter 6 with decoupled False took 0.4963827133178711
iter 7 with decoupled False took 1.7268507480621338
iter 8 with decoupled False took 1.980445384979248
iter 9 with decoupled False took 1.084465742111206
iter 10 with decoupled False took 1.3961007595062256
iter 11 with decoupled False took 1.497725248336792
iter 12 with decoupled False took 2.851093292236328
iter 13 with decoupled False took 4.799160957336426
iter 14 with decoupled False took 2.331326723098755
iter 15 with decoupled False took 2.8272359371185303
iter 16 with decoupled False took 4.882258176803589
iter 17 with decoupled False took 7.02118182182312
iter 18 with decoupled False took 4.3998939990997314
iter 19 wi

In [5]:
executed

tensor([True, True])