In [1]:
import torch
from botorch.fit import fit_gpytorch_mll
from botorch.acquisition import UpperConfidenceBound
from botorch.optim import optimize_acqf
from gpytorch.mlls import ExactMarginalLogLikelihood

import sys
from path_info import PROJECT_DIR
sys.path.append(PROJECT_DIR)

from src.bnn import BayesianMLPModel

In [9]:
import plotly.graph_objects as go
import numpy as np

# Define the test function
def test_function(x):
    if 0 <= x <= 1:
        return 10 + 10 * (x - 0.5)**2  # High penalty within [0, 1]
    else:
        return (x - 2)**2  # Quadratic function with a minimum at x = 2

# Generate x values
x_values = np.linspace(-1, 3, 400)
y_values = [test_function(x) for x in x_values]

# Create the plot
fig = go.Figure()
fig.add_trace(go.Scatter(x=x_values, y=y_values, mode='lines', name='test_function'))

# Update layout
fig.update_layout(
    title="Plot of the test_function",
    xaxis_title="x",
    yaxis_title="test_function(x)",
    showlegend=True
)

# Show the plot
fig.show()


In [3]:
import torch
import torch.nn as nn
from torch.distributions.normal import Normal
from botorch.models.model import Model
from botorch.acquisition import UpperConfidenceBound
from botorch.optim import optimize_acqf
from gpytorch.likelihoods import GaussianLikelihood
from gpytorch.distributions import MultivariateNormal


class BayesianLinearRegression(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(BayesianLinearRegression, self).__init__()
        self.input_dim = input_dim
        self.output_dim = output_dim
        
        # Parameters for the prior distributions of weights and biases
        self.w_mu = nn.Parameter(torch.zeros(input_dim, output_dim))
        self.w_log_sigma = nn.Parameter(torch.zeros(input_dim, output_dim))
        self.b_mu = nn.Parameter(torch.zeros(output_dim))
        self.b_log_sigma = nn.Parameter(torch.zeros(output_dim))
    
    def forward(self, x):
        w_sigma = torch.exp(self.w_log_sigma)
        b_sigma = torch.exp(self.b_log_sigma)
        
        # Sampling weights and biases
        w = self.w_mu + w_sigma * torch.randn_like(self.w_mu)
        b = self.b_mu + b_sigma * torch.randn_like(self.b_mu)
        
        return torch.matmul(x, w) + b
    
    def predict_dist(self, x):
        y = self.forward(x)
        
        # Calculating the uncertainty of the output
        w_sigma = torch.exp(self.w_log_sigma)
        b_sigma = torch.exp(self.b_log_sigma)
        
        # Calculating the standard deviation considering the uncertainty of weights and biases
        output_sigma = torch.sqrt(torch.matmul(x**2, w_sigma**2) + b_sigma**2)
        
        return Normal(y, output_sigma)


class BayesianMLP(nn.Module):
    def __init__(self, input_dim, min_val=None, max_val=None):
        super(BayesianMLP, self).__init__()
        self.hidden1 = nn.Linear(input_dim, 64)
        self.hidden2 = nn.Linear(64, 64)
        self.hidden3 = nn.Linear(64, 64)
        self.relu = nn.ReLU()
        self.bayesian_output = BayesianLinearRegression(64, 1)
        self.min_val = min_val
        self.max_val = max_val
    
    def forward(self, x):
        x = self.relu(self.hidden1(x))
        x = self.relu(self.hidden2(x))
        x = self.relu(self.hidden3(x))
        
        # Get output from Bayesian linear regression
        y_dist = self.bayesian_output.predict_dist(x)

        if self.min_val is not None or self.max_val is not None:
            # Clamp the output
            y_mean = torch.clamp(y_dist.mean, min=self.min_val, max=self.max_val)
        else:
            y_mean = y_dist.mean

        y_stddev = y_dist.stddev  # Keep the standard deviation as it is
        
        # Return new distribution
        return Normal(y_mean, y_stddev)


class BayesianMLPModel(Model):
    def __init__(self, input_dim, min_val=None, max_val=None):
        super().__init__()
        self.bayesian_mlp = BayesianMLP(input_dim, min_val, max_val)
        self.likelihood = GaussianLikelihood()
        self._num_outputs = 1

    def forward(self, x):
        return self.bayesian_mlp(x)
    
    def posterior(self, X, observation_noise=False, **kwargs):
        pred_dist = self.bayesian_mlp(X)
        mean = pred_dist.mean
        covar = torch.diag(pred_dist.stddev**2)
        return MultivariateNormal(mean, covar)
    
    @property
    def num_outputs(self):
        return self._num_outputs
    
    @property
    def train_inputs(self):
        return self._train_inputs

    @property
    def train_targets(self):
        return self._train_targets

    def set_train_data(self, inputs=None, targets=None, strict=True):
        self._train_inputs = inputs
        self._train_targets = targets


# Define the test function
def test_function(x):
    if 0 <= x <= 1:
        return 10 + 10 * (x - 0.5)**2  # High penalty within [0, 1]
    else:
        return (x - 2)**2  # Quadratic function with a minimum at x = 2

# Convert test_function to a PyTorch tensor
def objective(x):
    return torch.tensor([test_function(xi.item()) for xi in x], dtype=torch.float32)


def train_model(model, train_X, train_Y, num_epochs=1000, learning_rate=0.01):
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    model.train()
    for epoch in range(num_epochs):
        optimizer.zero_grad()
        output_dist = model(train_X)
        loss = -output_dist.log_prob(train_Y).mean()  # Minimize negative log likelihood
        loss.backward()
        optimizer.step()
    return model


# Create initial data points
train_X = torch.rand(10, 1) * 4  # Generate 10 random initial points in the range [0, 4)
train_Y = torch.tensor([test_function(xi.item()) for xi in train_X], dtype=torch.float32).unsqueeze(-1)

# Optimization loop
for i in range(100):
    # Define and fit the model
    model = BayesianMLPModel(input_dim=1)
    model.set_train_data(train_X, train_Y)
    model = train_model(model, train_X, train_Y)

    # Set the model to evaluation mode
    model.eval()
    
    # Define the acquisition function
    acq_func = UpperConfidenceBound(model, beta=0.1)
    
    # Perform optimization
    bounds = torch.tensor([[0.0], [4.0]])
    candidate, acq_value = optimize_acqf(
        acq_function=acq_func,
        bounds=bounds,
        q=1,
        num_restarts=5,
        raw_samples=20,
    )
    
    print(f"Iteration {i+1}, Optimal candidate: {candidate.item()}")
    
    # Obtain new data point
    new_x = candidate
    new_y = torch.tensor([test_function(new_x.item())], dtype=torch.float32).unsqueeze(-1)
    
    # Update data
    train_X = torch.cat([train_X, new_x.unsqueeze(0)])
    train_Y = torch.cat([train_Y, new_y])


print()
print()
print()
print("hellooooo")
print(train_X[train_Y.argmin().item()])


RuntimeError: diag(): Supports 1D or 2D tensors. Got 3D

In [8]:
import torch
import torch.nn as nn
from torch.distributions.normal import Normal
from botorch.models.model import Model
from botorch.acquisition import UpperConfidenceBound
from botorch.optim import optimize_acqf
from gpytorch.likelihoods import GaussianLikelihood
from gpytorch.distributions import MultivariateNormal


class BayesianLinearRegression(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(BayesianLinearRegression, self).__init__()
        self.input_dim = input_dim
        self.output_dim = output_dim
        
        # Parameters for the prior distributions of weights and biases
        self.w_mu = nn.Parameter(torch.zeros(input_dim, output_dim))
        self.w_log_sigma = nn.Parameter(torch.zeros(input_dim, output_dim))
        self.b_mu = nn.Parameter(torch.zeros(output_dim))
        self.b_log_sigma = nn.Parameter(torch.zeros(output_dim))
    
    def forward(self, x):
        w_sigma = torch.exp(self.w_log_sigma)
        b_sigma = torch.exp(self.b_log_sigma)
        
        # Sampling weights and biases
        w = self.w_mu + w_sigma * torch.randn_like(self.w_mu)
        b = self.b_mu + b_sigma * torch.randn_like(self.b_mu)
        
        return torch.matmul(x, w) + b
    
    def predict_dist(self, x):
        y = self.forward(x)
        
        # Calculating the uncertainty of the output
        w_sigma = torch.exp(self.w_log_sigma)
        b_sigma = torch.exp(self.b_log_sigma)
        
        # Calculating the standard deviation considering the uncertainty of weights and biases
        output_sigma = torch.sqrt(torch.matmul(x**2, w_sigma**2) + b_sigma**2)
        
        return Normal(y, output_sigma)


class BayesianMLP(nn.Module):
    def __init__(self, input_dim, min_val=None, max_val=None):
        super(BayesianMLP, self).__init__()
        self.hidden1 = nn.Linear(input_dim, 64)
        self.hidden2 = nn.Linear(64, 64)
        self.hidden3 = nn.Linear(64, 64)
        self.relu = nn.ReLU()
        self.bayesian_output = BayesianLinearRegression(64, 1)
        self.min_val = min_val
        self.max_val = max_val
    
    def forward(self, x):
        x = self.relu(self.hidden1(x))
        x = self.relu(self.hidden2(x))
        x = self.relu(self.hidden3(x))
        
        # Get output from Bayesian linear regression
        y_dist = self.bayesian_output.predict_dist(x)

        if self.min_val is not None or self.max_val is not None:
            # Clamp the output
            y_mean = torch.clamp(y_dist.mean, min=self.min_val, max=self.max_val)
        else:
            y_mean = y_dist.mean

        y_stddev = y_dist.stddev  # Keep the standard deviation as it is
        
        # Return new distribution
        return Normal(y_mean, y_stddev)


class BayesianMLPModel(Model):
    def __init__(self, input_dim, min_val=None, max_val=None):
        super().__init__()
        self.bayesian_mlp = BayesianMLP(input_dim, min_val, max_val)
        self.likelihood = GaussianLikelihood()
        self._num_outputs = 1

    def forward(self, x):
        return self.bayesian_mlp(x)
    
    def posterior(self, X, observation_noise=False, **kwargs):
        pred_dist = self.bayesian_mlp(X)
        mean = pred_dist.mean.squeeze(-1)  # Ensure mean is 2D
        stddev = pred_dist.stddev.squeeze(-1)  # Ensure stddev is 2D
        covar = torch.diag_embed(stddev**2)
        return MultivariateNormal(mean, covar)
    
    @property
    def num_outputs(self):
        return self._num_outputs
    
    @property
    def train_inputs(self):
        return self._train_inputs

    @property
    def train_targets(self):
        return self._train_targets

    def set_train_data(self, inputs=None, targets=None, strict=True):
        self._train_inputs = inputs
        self._train_targets = targets


# Define the test function
def test_function(x):
    if 0 <= x <= 1:
        return 10 + 10 * (x - 0.5)**2  # High penalty within [0, 1]
    else:
        return (x - 2)**2  # Quadratic function with a minimum at x = 2

# Convert test_function to a PyTorch tensor
def objective(x):
    return torch.tensor([test_function(xi.item()) for xi in x], dtype=torch.float32)


def train_model(model, train_X, train_Y, num_epochs=1000, learning_rate=0.01):
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    model.train()
    for epoch in range(num_epochs):
        optimizer.zero_grad()
        output_dist = model(train_X)
        loss = -output_dist.log_prob(train_Y).mean()  # Minimize negative log likelihood
        loss.backward()
        optimizer.step()
    return model


# Create initial data points
train_X = torch.rand(10, 1) * 4  # Generate 10 random initial points in the range [0, 4)
train_Y = torch.tensor([test_function(xi.item()) for xi in train_X], dtype=torch.float32).unsqueeze(-1)

# Optimization loop
for i in range(100):
    # Define and fit the model
    model = BayesianMLPModel(input_dim=1)
    model.set_train_data(train_X, train_Y)
    model = train_model(model, train_X, train_Y)

    # Set the model to evaluation mode
    model.eval()
    
    # Define the acquisition function
    acq_func = UpperConfidenceBound(model, beta=0.1)
    
    # Perform optimization
    bounds = torch.tensor([[0.0], [4.0]])
    candidate, acq_value = optimize_acqf(
        acq_function=acq_func,
        bounds=bounds,
        q=1,
        num_restarts=5,
        raw_samples=20,
    )
    
    print(f"Iteration {i+1}, Optimal candidate: {candidate.item()}")
    
    # Obtain new data point
    new_x = candidate
    new_y = torch.tensor([test_function(new_x.item())], dtype=torch.float32).unsqueeze(-1)
    
    # Update data
    train_X = torch.cat([train_X, new_x])
    train_Y = torch.cat([train_Y, new_y])


print()
print()
print()
print("hellooooo")
print(train_X[train_Y.argmin().item()])


Iteration 1, Optimal candidate: 0.0
Iteration 2, Optimal candidate: 0.0
Iteration 3, Optimal candidate: 0.0
Iteration 4, Optimal candidate: 0.0
Iteration 5, Optimal candidate: 0.0
Iteration 6, Optimal candidate: 0.0
Iteration 7, Optimal candidate: 0.0
Iteration 8, Optimal candidate: 0.0
Iteration 9, Optimal candidate: 0.0
Iteration 10, Optimal candidate: 0.0
Iteration 11, Optimal candidate: 0.080141082406044
Iteration 12, Optimal candidate: 0.0
Iteration 13, Optimal candidate: 0.12079741060733795
Iteration 14, Optimal candidate: 0.0
Iteration 15, Optimal candidate: 0.17396406829357147
Iteration 16, Optimal candidate: 0.0
Iteration 17, Optimal candidate: 0.18600159883499146
Iteration 18, Optimal candidate: 0.0
Iteration 19, Optimal candidate: 0.0
Iteration 20, Optimal candidate: 0.0
Iteration 21, Optimal candidate: 0.0
Iteration 22, Optimal candidate: 0.0
Iteration 23, Optimal candidate: 0.0
Iteration 24, Optimal candidate: 0.8540904521942139
Iteration 25, Optimal candidate: 0.186233282