Bayesian Optimizer from ChatGPT as reference for a working model suggests next experiment variables to explore

In [2]:
import numpy as np
from scipy.optimize import minimize
from scipy.stats import qmc
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import Matern

class BayesianOptimizer:
    def __init__(self, bounds, is_categorical, batch_size=3, beta=2.0):
        self.bounds = np.array(bounds)  # [(low, high), ...] for each dimension
        self.is_categorical = np.array(is_categorical)  # Boolean mask for categorical variables
        self.batch_size = batch_size
        self.beta = beta  # Controls exploration vs. exploitation
        
        # Define GP model with Matern kernel
        self.kernel = Matern(length_scale=1.0, nu=2.5)
        self.gp = GaussianProcessRegressor(kernel=self.kernel, alpha=1e-6, normalize_y=True)

        # Store observed data
        self.X_train = None
        self.y_train = None

    def add_observations(self, X_new, y_new):
        """Update the dataset with new observations."""
        if self.X_train is None:
            self.X_train = np.array(X_new)
            self.y_train = np.array(y_new)
        else:
            self.X_train = np.vstack((self.X_train, X_new))
            self.y_train = np.append(self.y_train, y_new)
        self.gp.fit(self.X_train, self.y_train)  # Retrain GP

    def ucb_acquisition(self, X):
        """Upper Confidence Bound (UCB) acquisition function."""
        mean, std = self.gp.predict(X, return_std=True)
        return mean + self.beta * std  # Encourages exploration & exploitation

    def optimize_acquisition(self):
        """Finds the next experiment to run using different strategies for continuous & categorical variables."""
        if np.any(self.is_categorical):
            # Latin Hypercube Sampling (LHS) for categorical variables
            sampler = qmc.LatinHypercube(d=len(self.bounds))
            sample_points = qmc.scale(sampler.random(n=10000), self.bounds[:, 0], self.bounds[:, 1])
            best_idx = np.argmax(self.ucb_acquisition(sample_points))
            return sample_points[best_idx]
        else:
            # Use LBFGS for continuous optimization
            best_x = None
            best_value = -np.inf
            for _ in range(10):  # Multi-start optimization
                x0 = np.random.uniform(self.bounds[:, 0], self.bounds[:, 1])
                res = minimize(lambda x: -self.ucb_acquisition(x.reshape(1, -1)), x0, bounds=self.bounds, method="L-BFGS-B")
                if res.fun < best_value:
                    best_value = res.fun
                    best_x = res.x
            return best_x

    def batch_selection(self):
        """Select multiple experiments using the 'constant liar' approach."""
        selected_points = []
        for _ in range(self.batch_size):
            next_x = self.optimize_acquisition()
            selected_points.append(next_x)
            
            # "Lying" step: Assume a mean value for the next point before real data comes in
            fake_y = self.gp.predict(next_x.reshape(1, -1)).mean()
            self.add_observations(next_x.reshape(1, -1), fake_y)
        
        return np.array(selected_points)

# Example usage
bounds = [(0, 10), (0, 5)]  # Example bounds for 2 variables
is_categorical = [False, True]  # First variable is continuous, second is categorical

bo = BayesianOptimizer(bounds, is_categorical)

# Assume we already have some observations
X_initial = np.array([[2, 1], [4, 0], [6, 1]])  # Example (continuous, categorical)
y_initial = np.array([0.5, 1.2, 0.8])  # Example target values
bo.add_observations(X_initial, y_initial)

# Get the next batch of experiments
next_experiments = bo.batch_selection()
print("Next batch of experiments:", next_experiments)


Next batch of experiments: [[4.23887752e+00 3.96728088e-03]
 [4.21119894e+00 7.70948507e-03]
 [5.25865176e+00 4.50278376e+00]]


