In [78]:
#NN Surrogate model class
from injector_surrogate_quads import *
from custom_priors import *
import physics_gp

sys.path.append('../configs')
#Sim reference point to optimize around
from ref_config import ref_point

#Pytorch 
import numpy as np
import torch
import gpytorch
import botorch 

import matplotlib.pyplot as plt

# BO Comparisons for Minimizing Emittance*Bmag with 9 Variables (SQ, CQ, SOL, matching quads)

In [79]:
# load injector model
Model = Surrogate_NN(pytorch=True)

Model.load_saved_model(model_path = '../models/', 
                       model_name = 'Injector_Surrogate_NN_PyTorch')

Model.load_scaling(scalerfilex = '../data/transformer_x_pytorch.pth', 
                   scalerfiley = '../data/transformer_y_pytorch.pth')
Model.take_log_out = False

## Import design Twiss parameters (OTR2)

In [80]:
beamline_info = json.load(open('../configs/beamline_info.json'))
get_twiss0 = beamline_info['Twiss0']

# emit, beta, alpha
twiss0 = {'x': [get_twiss0[0], get_twiss0[2], get_twiss0[4]],
          'y': [get_twiss0[1], get_twiss0[3], get_twiss0[5]]}

beta0_x, alpha0_x = twiss0['x'][1], twiss0['x'][2]
beta0_y, alpha0_y = twiss0['y'][1], twiss0['y'][2]
# print(twiss0['x'], twiss0['y'])

## Objective Function

In [81]:
# convert to machine units
ref_point = Model.sim_to_machine(np.asarray(ref_point))

# input params: solenoid and quads to vary 
opt_var_names = ['SOL1:solenoid_field_scale','CQ01:b1_gradient', 'SQ01:b1_gradient',
                 "QA01:b1_gradient", "QA02:b1_gradient", 
                 "QE01:b1_gradient", "QE02:b1_gradient", "QE03:b1_gradient", "QE04:b1_gradient"]

bounds = torch.as_tensor([[0.46, 0.485], [-0.02, 0.02], [-0.02, 0.02],
                       [-4, -1], [1, 4],
                       [-7,-1], [-1, 7],[-1, 7], [-7, 1]])

# output params: emittance in transverse plane (x & y)
opt_out_names = ['norm_emit_x','norm_emit_y']

In [82]:
def evaluate(config): 
    """
    D is input space dimensionality
    N is number of sample points
    :param config: input values of opt_var_names, torch.tensor, shape (N, D) 
    returns (N, 1) 
    """
    N = config.shape[0]
    D = config.shape[1]
    
    # make input array of length model_in_list (inputs model takes)
    x_in = torch.empty((N,len(Model.model_in_list)))
    
    # fill in reference point around which to optimize
    x_in[:,:] = torch.tensor(ref_point[0])

    #set solenoid, CQ, SQ, matching quads to values from optimization step
    col = []
    for i in range(D):
        col.append(Model.loc_in[opt_var_names[i]]) #should make col a flat list of indices, e.g. [4, 6, 7]
    x_in[:, col] = config[:,:] 
    
    #output predictions
    y_out = Model.pred_machine_units(x_in)

    return -1*objective(y_out)


def objective(y_out):
    """
    :param y_out: tensor with has a shape of (N, num_outputs)
    returns tensor of emittance * bmag for each input, shape (N, 1)
    """
    
    # geometric emittance in transverse plane
    out1 = y_out[:,Model.loc_out['norm_emit_x']] #grab norm_emit_x out of the model
    out2 = y_out[:,Model.loc_out['norm_emit_y']] #grab norm_emit_y out of the model
    emit = torch.sqrt(out1 * out2)
  
    sigma_x = y_out[:,Model.loc_out['sigma_x']] #grab sigma_x out of the model 
    sigma_y = y_out[:,Model.loc_out['sigma_y']] #grab sigma_y out of the model 
    
    # real beta and alpha 
    # NEEDS TO BE FIXED - currently assuming real alpha to be the same as design alpha 
    alpha_x = torch.tensor(alpha0_x).repeat(y_out.shape[0])
    alpha_y = torch.tensor(alpha0_y).repeat(y_out.shape[0])
    beta_x, beta_y = (sigma_x**2) / out1, (sigma_y**2) / out2
    
    # bmag 
    bmag_x = 0.5 * ((beta0_x / beta_x) + (beta_x / beta0_x)) + 0.5 * ((alpha_x * torch.sqrt(beta0_x / beta_x) - alpha0_x * torch.sqrt(beta_x / beta0_x))**2)
    bmag_y = 0.5 * ((beta0_y / beta_y) + (beta_y / beta0_y)) + 0.5 * ((alpha_y * torch.sqrt(beta0_y / beta_y) - alpha0_y * torch.sqrt(beta_y / beta0_y))**2)
    bmag = torch.sqrt(bmag_x * bmag_y)
    
    out = (emit * bmag)/1e-6 # in um units 
    return out.reshape(-1,1)

## Bayesian Optimization

### Gaussian Regression & Acquisition Function

In [83]:
def get_BO_point(x, f, bounds, beta=0.1, mean_module=None, input_transform=None, outcome_transform=None):
    """
    function that trains a GP model of data and returns the next observation point using UCB
    D is input space dimensionality
    N is number of samples

    :param x: input points data, torch.tensor, shape (N,D)
    :param f: output point data, torch.tensor, shape (N,1)
    :param bounds: input space bounds, torch.tensor, shape (2,D)
    :param precision: precision matrix used for RBF kernel (must be PSD), torch.tensor, (D,D)
    :param beta: UCB optimization parameter, float
    :return x_candidate, model: next observation point and gp model w/observations
    """
    gp = botorch.models.SingleTaskGP(x, f,
                                     mean_module=mean_module, 
                                     outcome_transform=outcome_transform, 
                                     input_transform=input_transform)
        
    mll = gpytorch.mlls.ExactMarginalLogLikelihood(gp.likelihood, gp)
    
    # fit GP hyperparameters
    botorch.fit.fit_gpytorch_model(mll)

    # do UCB acquisition
    UCB = botorch.acquisition.UpperConfidenceBound(gp, beta=beta)
    candidate, acq_value = botorch.optim.optimize_acqf(UCB,
                                                       bounds=bounds,
                                                       q=1,
                                                       num_restarts=100,
                                                       raw_samples=100)
    return candidate, gp

In [84]:
def BayesianOptimization(train_x, train_y, n_steps, beta=2.0, prior=None, transformer_x=None, transformer_y=None):
    """
    Performs BO and returns the best output (-emittance*bmag) per iteration
    :param train_x: initial training inputs 
    :param train_y: initial training outputs
    :param n_steps: number of iterations that BO will run for
    :param beta: the beta value used in UCB 
    :param prior: the prior mean / mean module for the GP
    :param transformer_x: transformation to be applied to train_x
    :param transformer_y: transformation to be applied to train_y 
    """
    best_y_list = []
    best_y = torch.max(train_y)
    best_y_list.append(best_y)
    
    for i in range(n_steps):
        x_new, model = get_BO_point(train_x, train_y, 
                                    bounds=bounds.transpose(0,1), 
                                    beta=beta,
                                    mean_module=prior, 
                                    input_transform=transformer_x, 
                                    outcome_transform=transformer_y)

        train_x = torch.cat((train_x, x_new))
        new_y = evaluate(train_x[-1].reshape(1,-1)).detach()
        train_y = torch.cat((train_y, new_y))

        if (new_y > best_y):
            best_y = new_y
            color = '\033[95m', '\033[0m'
        else: 
            color = '\u001b[30m', '\033[0m'
        
        best_y_list.append(best_y)
        
        # print("iter     target       SOL        CQ        SQ        QA1        QA2        Q1        Q2        Q3        Q4")
        # print(f'{color[0]}{i+1}      {train_y[-1,0]:.5f}   {train_x[-1,0]:.5f}   {train_x[-1,1]:.5f}   {train_x[-1,2]:.5f}   {train_x[-1,3]:.5f}   {train_x[-1,4]:.5f}   {train_x[-1,5]:.5f}   {train_x[-1,6]:.5f}   {train_x[-1,7]:.5f}   {train_x[-1,8]:.5f}{color[1]}')
        
    return torch.tensor(best_y_list), train_x

### Custom Mean Modules

In [85]:
# NN model trained from surrogate model samples as prior
from gpytorch.means.mean import Mean
class CustomMean(Mean):
    def __init__(self, name, prior, NN_y_transform, outcome_transform):
        super(CustomMean, self).__init__()
        self.NN_model = prior
        self.NN_model.load_state_dict(torch.load('./results/' + name + '.pth'))
        self.NN_model.eval()
        
        self.NN_model.requires_grad_(False)
        
        self.y_norm_transform = botorch.models.transforms.input.Normalize(1)
        self.y_norm_transform.load_state_dict(torch.load(f'./results/' + NN_y_transform + '.pth'))
        self.y_norm_transform.eval()
        
        self.outcome_transform = outcome_transform
        
    def forward(self, x):
        """
        takes in transformed x, returns transformed y
        """
        self.outcome_transform.eval()
        
        out = []
        if (x.dim() == 2):
            x = x.unsqueeze(dim=0) # shape x into (batch_size = 1, n_samples, n_var) if necessary 
        # evaluate in batches
        for i in range(x.size(dim=0)):
            m = self.NN_model(x[i].detach()).float() # normed x |-> NN normed y 
            m = self.y_norm_transform.untransform(m) # NN normed y -> real y
            m = self.outcome_transform(m)[0] # real y -> standardized y
            out.append(m.squeeze())
            
        self.outcome_transform.train()
        return torch.stack(out,dim=0)
    
# injector surrogate model as "perfect" prior mean
class Surrogate(Mean):
    def __init__(self, input_transform, outcome_transform):
        super(Surrogate, self).__init__()
        self.input_transform = input_transform 
        self.outcome_transform = outcome_transform

    def forward(self, x):
        """
        takes in transformed x, returns transformed y (retaining gradient) 
        """
        self.input_transform.eval()
        self.outcome_transform.eval()
        
        x = self.input_transform.untransform(x) # normed x -> real x 
        out = []
        
        if (x.dim() == 2):
            x = x.unsqueeze(dim=0) # shape x into (batch_size = 1, n_samples, n_var) if necessary 
        # evaluate in batches
        for i in range(x.size(dim=0)):
            m = evaluate(x[i].detach()).float() # real x |-> real y
            m = self.outcome_transform(m)[0] # real y -> standardized y
            out.append(m.squeeze())
            
        self.input_transform.train() 
        self.outcome_transform.train()
        return torch.stack(out,dim=0)

### BO trials

In [86]:
n_var=9
def BO_comparisons(filename, BO_runs, NN_transformers, models, run_BO, n_trials=50, n_steps=30, beta=2.0, n_samples=3):
    """
    runs BOs with various prior means and saves the best -emittance*output per iteration and 
    input variable changes to a file
    
    :param filename: name of the .pt file that best_y_lists and x_configs are saved to in the form of a dictionary; String
    :param BO_runs: list of names of the types of runs- constant prior mean, ground truth, and NN model names; list of Strings
    :param NN_transformers: transformer_y used to train each NN model; list of Strings
    :param models: classes for each NN model; list of classes
    :param run_BO: whether or not to run the type of BO; list of booleans
    :param n_trials: number of BO comparison trials
    :param n_steps: number of iterations for each BO run
    :param beta: beta value for UCB
    :param n_samples: number of initial training samples 
    :returns best_y_lists: best -emittance*output per iteration for all trials; list; shape (len(BO_runs), n_trials, n_steps)
    :returns x_configs: changes in input variable value over iterations for last trial; list; shape (len(BO_runs), n_steps, 9)
    """
    best_y_lists = [] 
    x_configs = [] 
    for i in range(len(BO_runs)):
        best_y_lists.append([])
        x_configs.append([])
                
    for i in range(n_trials):
        #for each trial, new training samples are generated, and each BO run type is executed
        
        train_x = torch.zeros((n_samples, n_var)) 
        for j in range(n_var):
            train_x[:,j] = torch.tensor(np.random.uniform(bounds[j,0],bounds[j,1],(n_samples,)))

        train_y = evaluate(train_x).detach().reshape(-1,1)
        # print(train_x, train_y)
        
        transformer_y_list = [] # outcome_transform modules for each type of run
        for j in range(len(BO_runs)):
            transformer_y_list.append(botorch.models.transforms.outcome.Standardize(1))

        priors = [None, Surrogate(transformer_x, transformer_y_list[1])] # the priors used for each type of run 
        for j in range(len(NN_transformers)):
            priors.append(CustomMean(BO_runs[j+2], models[j](), NN_transformers[j], transformer_y_list[j+2]))
            
        print(f'iter {i}')
        
        # run BO with the specified prior means
        # data from each run is saved in a dictionary in a .pt file 
        for run_index in range (len(BO_runs)):
            if run_BO[run_index] == True:
                print(f'running {BO_runs[run_index]}')
                prior_best_y, prior_x_config = BayesianOptimization(train_x, 
                                                            train_y, 
                                                            n_steps=n_steps, 
                                                            beta=beta,
                                                            prior=priors[run_index], 
                                                            transformer_x=transformer_x,
                                                            transformer_y=transformer_y_list[run_index])
                best_y_lists[run_index].append(prior_best_y)
                x_configs[run_index] = prior_x_config
            torch.save({'BO_runs': BO_runs, 'run_BO': run_BO, 'x_configs': x_configs, 'best_y_lists': best_y_lists}, './results/' + filename + '.pt')
    
    print('finished')
    print('saved data to ' + filename)
    return best_y_lists, x_configs

In [None]:
# setup a BO comparison run
BO_runs = ['constant (default)','ground truth', 
           'model3_1hidden_20nodes_500epoch_0.02', 
           'model4_3hidden_30nodes_500epoch_0.01',
           'model5_4hidden_40nodes_1000epoch_0.01_-5'] # specify names of runs (including names of NN prior mean models)

transformer_x = botorch.models.transforms.input.Normalize(n_var, bounds = bounds.transpose(0,1))
NN_transformers = ['transformer_y_3^9', 
                   'transformer_y_4^9', 
                   'transformer_y_5^9_-5'] # specify names of transformer_y for each NN prior mean model
models = [NN3_prior,
          NN4_prior, 
          NN5_prior] # specify NN prior mean model class

run_BO = [True, True, True, True, False] # whether or not to run the type of BO

# filename = f'surr_const_model3_model4_{beta}_{n_steps}iter'
filename = 'test'

best_y_lists, x_configs = BO_comparisons(filename=filename,
                                         BO_runs=BO_runs, 
                                         NN_transformers=NN_transformers, 
                                         models=models, 
                                         run_BO=run_BO, 
                                         n_trials=50, 
                                         n_steps=50, 
                                         beta=2.0)