In [2]:
import torch
import gpytorch
import matplotlib.pyplot as plt
import numpy as np

# Define the true function
def true_function(x):
    return torch.sin(x * (2 * 3.1416))

# Define the data points with a slightly wider range
train_x = torch.linspace(-0.5, 1.5, 100).reshape(-1, 1)
train_y = true_function(train_x) + torch.randn(train_x.size()) * 0.2

# Create a GP model class
class ExactGPModel(gpytorch.models.ExactGP):
    def __init__(self, train_x, train_y, likelihood):
        super(ExactGPModel, self).__init__(train_x, train_y, likelihood)
        self.mean_module = gpytorch.means.ConstantMean()
        self.covar_module = gpytorch.kernels.ScaleKernel(gpytorch.kernels.RBFKernel())

    def forward(self, x):
        mean_x = self.mean_module(x)
        covar_x = self.covar_module(x)
        return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)

# Initialize likelihood
likelihood = gpytorch.likelihoods.GaussianLikelihood()

# UCB and Thompson Sampling settings
iterations = 10

# UCB strategy
ucb_model = ExactGPModel(train_x[:5], train_y[:5], likelihood)
ucb_model.train()
likelihood.train()
optimizer = torch.optim.Adam(ucb_model.parameters(), lr=0.1)
mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, ucb_model)

for _ in range(iterations):
    ucb_model.train()
    likelihood.train()
    optimizer.zero_grad()
    output = ucb_model(ucb_model.train_inputs[0])
    loss = -mll(output, ucb_model.train_targets).mean()
    loss.backward()
    optimizer.step()
    
    ucb_model.eval()
    likelihood.eval()
    with torch.no_grad(), gpytorch.settings.fast_pred_var():
        observed_pred = likelihood(ucb_model(train_x))
        mean = observed_pred.mean
        lower, upper = observed_pred.confidence_region()

    # UCB action selection
    ucb_action = torch.argmax(mean + 1.96 * (upper - lower) / 2)
    new_x = train_x[ucb_action].reshape(-1, 1)
    new_y = true_function(new_x) + torch.randn(new_x.size()) * 0.2

    # Update training data
    ucb_model.set_train_data(inputs=torch.cat([ucb_model.train_inputs[0], new_x]), targets=torch.cat([ucb_model.train_targets, new_y]), strict=False)

# Thompson Sampling strategy
ts_model = ExactGPModel(train_x[:5], train_y[:5], likelihood)
ts_model.train()
likelihood.train()
optimizer = torch.optim.Adam(ts_model.parameters(), lr=0.1)
mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, ts_model)

for _ in range(iterations):
    ts_model.train()
    likelihood.train()
    optimizer.zero_grad()
    output = ts_model(ts_model.train_inputs[0])
    loss = -mll(output, ts_model.train_targets).mean()
    loss.backward()
    optimizer.step()
    
    ts_model.eval()
    likelihood.eval()
    with torch.no_grad(), gpytorch.settings.fast_pred_var():
        observed_pred = likelihood(ts_model(train_x))
        mean = observed_pred.mean
        lower, upper = observed_pred.confidence_region()

    # Thompson Sampling action selection
    ts_sample = torch.normal(mean, (upper - lower) / 2)
    ts_action = torch.argmax(ts_sample)
    new_x = train_x[ts_action].reshape(-1, 1)
    new_y = true_function(new_x) + torch.randn(new_x.size()) * 0.2

    # Update training data
    ts_model.set_train_data(inputs=torch.cat([ts_model.train_inputs[0], new_x]), targets=torch.cat([ts_model.train_targets, new_y]), strict=False)

# Plotting the results
plt.figure(figsize=(20, 10))

# Plot posterior with UCB
plt.subplot(1, 2, 1)
ucb_model.eval()
likelihood.eval()
with torch.no_grad(), gpytorch.settings.fast_pred_var():
    observed_pred = likelihood(ucb_model(train_x))
    post_mean = observed_pred.mean
    post_lower, post_upper = observed_pred.confidence_region()

plt.plot(train_x.numpy(), true_function(train_x).numpy(), 'k--', label='True Function')
plt.plot(train_x.numpy(), post_mean.numpy(), 'b', label='UCB Mean')
plt.fill_between(train_x.numpy(), post_lower.numpy(), post_upper.numpy(), alpha=0.3)
plt.scatter(ucb_model.train_inputs[0].numpy(), ucb_model.train_targets.numpy(), c='r', marker='x', label='Training Points')
plt.title('Posterior with UCB')
plt.xlabel('x')
plt.ylabel('y')
plt.legend()
plt.grid(True)

# Plot posterior with Thompson Sampling
plt.subplot(1, 2, 2)
ts_model.eval()
likelihood.eval()
with torch.no_grad(), gpytorch.settings.fast_pred_var():
    observed_pred = likelihood(ts_model(train_x))
    post_mean = observed_pred.mean
    post_lower, post_upper = observed_pred.confidence_region()

plt.plot(train_x.numpy(), true_function(train_x).numpy(), 'k--', label='True Function')
plt.plot(train_x.numpy(), post_mean.numpy(), 'b', label='TS Mean')
plt.fill_between(train_x.numpy(), post_lower.numpy(), post_upper.numpy(), alpha=0.3)
plt.scatter(ts_model.train_inputs[0].numpy(), ts_model.train_targets.numpy(), c='r', marker='x', label='Training Points')
plt.title('Posterior with Thompson Sampling')
plt.xlabel('x')
plt.ylabel('y')
plt.legend()
plt.grid(True)

plt.tight_layout()
plt.show()

RuntimeError: Flattening the training labels failed. The most common cause of this error is that the shapes of the prior mean and the training labels are mismatched. The shape of the train targets is torch.Size([5, 1]), while the reported shape of the mean is torch.Size([5]).