# Import Libraries

In [66]:
# import libraries 
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch 
from botorch.models import SingleTaskGP
from gpytorch.mlls import ExactMarginalLogLikelihood
from botorch.acquisition import ExpectedImprovement
from gpytorch.kernels import MaternKernel
from botorch.optim import optimize_acqf
import torch.optim as optim
from botorch.fit import fit_gpytorch_mll
from sklearn import preprocessing

# Load the needed dataset 

In [68]:
# load the AgNP dataset
data = pd.read_csv('datasets/AgNP_dataset.csv')
# merge rows that have the same values 
data = data.groupby(data.columns.tolist()).size().reset_index().rename(columns={0:'count'})
# for each of the rows with the same values merge them and take the average of the loss column
data = data.groupby(['QAgNO3(%)', 'Qpva(%)', 'Qtsc(%)', 'Qseed(%)', 'Qtot(uL/min)'], as_index=False).mean().reset_index()
# drop the count column 
data = data.drop(columns=['count', 'index'])

X = data[['QAgNO3(%)', 'Qpva(%)', 'Qtsc(%)', 'Qseed(%)', 'Qtot(uL/min)']]
# convert the input and output columns to tensors
X = torch.tensor(X.values, dtype=torch.float)
y = torch.tensor(data['loss'].values, dtype=torch.float32).unsqueeze(1) 


# standardize the input tensor 
Scaler = preprocessing.StandardScaler()
Scaler.fit(X)

# set the bounds for the input columns
min1 = X[:,0].min()
max1 = X[:,0].max()
min2 = X[:,1].min()
max2 = X[:,1].max()
min3 = X[:,2].min()
max3 = X[:,2].max()
min4 = X[:,3].min()
max4 = X[:,3].max()
min5 = X[:,4].min()
max5 = X[:,4].max()
bounds = torch.tensor([[min1, min2, min3, min4, min5], [max1, max2, max3, max4, max5]], dtype=torch.float)

# Initialize the surrogate model, kernel, and acquisition function

In [40]:
# define the model
model = SingleTaskGP(X, y)
mll = ExactMarginalLogLikelihood(model.likelihood, model)
# fit the model
fit_gpytorch_mll(mll)

# set up a train gp function which will loop 100 times to best fit the data



  model = SingleTaskGP(X, y)


ExactMarginalLogLikelihood(
  (likelihood): GaussianLikelihood(
    (noise_covar): HomoskedasticNoise(
      (noise_prior): GammaPrior()
      (raw_noise_constraint): GreaterThan(1.000E-04)
    )
  )
  (model): SingleTaskGP(
    (likelihood): GaussianLikelihood(
      (noise_covar): HomoskedasticNoise(
        (noise_prior): GammaPrior()
        (raw_noise_constraint): GreaterThan(1.000E-04)
      )
    )
    (mean_module): ConstantMean()
    (covar_module): ScaleKernel(
      (base_kernel): MaternKernel(
        (lengthscale_prior): GammaPrior()
        (raw_lengthscale_constraint): Positive()
      )
      (outputscale_prior): GammaPrior()
      (raw_outputscale_constraint): Positive()
    )
  )
)

# Optimize the model using BO 

In [37]:
# optimize the acquisition function using Bayesian Optimization with botorch

# Define the number of iterations and initial points
N = 10
n = 5

# define the bounds of the input columns using the minimum value from the X dataset
bounds = torch.tensor([[X[:, 0].min(),X[:, 1].min(),X[:, 2].min(),X[:, 3].min(),X[:, 4].min()], [X[:, 0].max(),X[:, 1].max(),X[:, 2].max(),X[:, 3].max(),X[:, 4].max()]])
# define the best value of the output column
best_value = y.min()
# define the number of random restarts
num_restarts = 5
# define the number of iterations for the optimization
num_steps = 100

# define the initial points
X_init = bounds[0] + (bounds[1] - bounds[0]) * torch.rand(n, 5)

# optimize the acquisition function and build a list of optimized points (get 100)


# plot the original dataset using the loss value as y and index value as x then overlay the optimized points
plt.scatter(range(len(y)), y, label='Original Dataset')
plt.scatter(X_opt[:, 0], best_value, label='Optimized Points')
plt.legend()
plt.show()


RuntimeError: Tensors must have same number of dimensions: got 2 and 3