In [1]:
import torch
import botorch
from botorch.fit import fit_gpytorch_model
from botorch.models import SingleTaskGP
from botorch.test_functions import Hartmann
from gpytorch.mlls import ExactMarginalLogLikelihood

from botorch.utils.transforms import (
    concatenate_pending_points,
    match_batch_shape,
    t_batch_mode_transform,
)

neg_hartmann6 = Hartmann(dim=6, negate=True)

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
train_x = torch.zeros(1,6)
theta_i = train_x[0]
train_obj = neg_hartmann6(train_x).unsqueeze(-1)
model = SingleTaskGP(train_X=train_x, train_Y=train_obj)
model.D = 6
mll = ExactMarginalLogLikelihood(model.likelihood, model)
fit_gpytorch_model(mll);



In [3]:

class GradientInformation(botorch.acquisition.AnalyticAcquisitionFunction):
    '''Acquisition function to sample points for gradient information.

    Attributes:
        model: Gaussian process model that supplies the Jacobian (e.g. DerivativeExactGPSEModel).
    '''

    def __init__(self, model):
        '''Inits acquisition function with model.'''
        super().__init__(model)
        self.call_count = 0
        

    def update_theta_i(self, theta_i):
        '''Updates the current parameters.

        This leads to an update of K_xX_dx.

        Args:
            theta_i: New parameters.
        '''
        if not torch.is_tensor(theta_i):
            theta_i = torch.tensor(theta_i)
        self.theta_i = theta_i
        self.update_K_xX_dx()
    
    def K_xX(self,theta_t,X_hat):
            
        rslt = self.model.covar_module(theta_t,X_hat).evaluate()
        
        return rslt

    def update_K_xX_dx(self):
        
        '''When new x is given update K_xX_dx.'''
        # Pre-compute large part of K_xX_dx.
        X = self.model.train_inputs[0]
        x = self.theta_i.view(-1, self.model.D)
        self.K_xX_dx_part = self._get_KθX_dθ(x, X)

  

    def _get_KθX_dθ(self, theta_t, X_hat) :
        '''Computes the analytic derivative of the kernel K(x,X) w.r.t. x.

        Args:
            x: (n x D) Test points.

        Returns:
            (n x D) The derivative of K(x,X) w.r.t. x.
        '''
        
        jacobs = torch.autograd.functional.jacobian(func=lambda theta : self.K_xX(theta,X_hat),inputs=(theta_t))
        KθX_dθ = jacobs.sum(dim=2).transpose(1,2)

        return KθX_dθ

    # TODO: nicer batch-update for batch of thetas.
    #@botorch.utils.transforms.t_batch_mode_transform(expected_q=1)
    #@concatenate_pending_points
    @t_batch_mode_transform()
    def forward(self, thetas) :
        
        '''Evaluate the acquisition function on the candidate set thetas.

        Args:
            thetas: A (q) x D-dim Tensor of (q) batches with a d-dim theta points each.

        Returns:
            A (q)-dim Tensor of acquisition function values at the given theta points.
        '''

        #print(f'ACQ received thetas {thetas.shape}')
        self.call_count+=1
        sigma_n = self.model.likelihood.noise_covar.noise
        D = self.model.D
        ## does this include theta_i???
        X = self.model.train_inputs[0] 
        x = self.theta_i.view(-1, D)
        variances = []
        
        for theta in thetas:
            
            theta = theta.view(-1, D)

            X_hat = torch.cat([X,theta])
            K_XX = self.model.covar_module(X_hat,X_hat).evaluate() + sigma_n * torch.eye(X_hat.shape[0])
            K_XX_inv = torch.linalg.inv(K_XX)

            # get K_xX_dx
            K_xθ_dx = self._get_KθX_dθ(x, theta)
            K_xX_dx = torch.cat([self.K_xX_dx_part, K_xθ_dx], dim=-1)

            # Compute_variance.
            variance_d = -K_xX_dx @ K_XX_inv @ K_xX_dx.transpose(1, 2)
            variance_d = variance_d.squeeze()
            variances.append(torch.trace(variance_d).view(1))

        return -torch.cat(variances, dim=0)




In [4]:
from botorch.optim import optimize_acqf

gradInfo = GradientInformation(model)
gradInfo.update_theta_i(theta_i)
acq_points,acq_value  = optimize_acqf(
    acq_function=gradInfo,
    bounds=torch.tensor([[0.0] * 6, [1.0] * 6]),
    q=5,
    num_restarts=100,
    raw_samples=128,
    
    options={'nonnegative': True},
)

print("n_calls",gradInfo.call_count)


dist = [torch.mean(torch.abs(pt-theta_i)) for pt in acq_points]
print(dist)

    


n_calls 67
[tensor(0.5829), tensor(0.0744), tensor(0.5760), tensor(0.6410), tensor(0.3222)]
