In [None]:
#NN Surrogate model class
from injector_surrogate_quads import *
import physics_gp
import os

sys.path.append('../configs')
#Sim reference point to optimize around
from ref_config import ref_point

#Pytorch 
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import gpytorch
import botorch 

import matplotlib.pyplot as plt

random_seed = 1
torch.manual_seed(random_seed)


<torch._C.Generator at 0x7f51d051ae70>

# BO with Expressive Priors - 2nd Order Polynomial
### BO Minimizes Emittance*Bmag with 9 Variables (SQ, CQ, SOL, matching quads)

In [None]:
# load injector model
Model = Surrogate_NN(pytorch=True)

Model.load_saved_model(model_path = '../models/', 
                       model_name = 'Surrogate_NN_PyTorch')

Model.load_scaling(scalerfilex = '../data/transformer_x_pytorch.pth', 
                   scalerfiley = '../data/transformer_y_pytorch.pth')
Model.take_log_out = False

2022-06-24 14:23:41.676866: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [None]:
# design Twiss parameters 
beamline_info = json.load(open('../configs/beamline_info.json'))
get_twiss0 = beamline_info['Twiss0']

# emit, beta, alpha
twiss0 = {'x': [get_twiss0[0], get_twiss0[2], get_twiss0[4]],
          'y': [get_twiss0[1], get_twiss0[3], get_twiss0[5]]}

beta0_x, alpha0_x = twiss0['x'][1], twiss0['x'][2]
beta0_y, alpha0_y = twiss0['y'][1], twiss0['y'][2]
# print(twiss0['x'])
# print(twiss0['y'])

## Objective Function

In [None]:
# convert to machine units
ref_point = Model.sim_to_machine(np.asarray(ref_point))

# input params: solenoid and quads to vary 
opt_var_names = ['SOL1:solenoid_field_scale','CQ01:b1_gradient', 'SQ01:b1_gradient',
                 "QA01:b1_gradient", "QA02:b1_gradient", 
                 "QE01:b1_gradient", "QE02:b1_gradient", "QE03:b1_gradient", "QE04:b1_gradient"]
bounds = torch.as_tensor([[0.46, 0.485], [-0.02, 0.02], [-0.02, 0.02],
                       [-4, -1], [1, 4],
                       [-7,-1], [-1, 7],[-1, 7], [-7, 1]])

# output params: emittance in transverse plane (x & y)
opt_out_names = ['norm_emit_x','norm_emit_y']

In [None]:
def evaluate(config): 
    """
    D is input space dimensionality
    N is number of sample points
    :param config: input values of opt_var_names, torch.tensor, shape (N, D) 
    returns (N, 1) 
    """
    N = config.shape[0]
    D = config.shape[1]
    
    # make input array of length model_in_list (inputs model takes)
    x_in = torch.empty((N,len(Model.model_in_list)))
    
    # fill in reference point around which to optimize
    x_in[:,:] = torch.tensor(ref_point[0])

    #set solenoid, CQ, SQ, matching quads to values from optimization step
    col = []
    for i in range(D):
        col.append(Model.loc_in[opt_var_names[i]]) #should make col a flat list of indices, e.g. [4, 6, 7]
    x_in[:, col] = config[:,:] 
    
    #output predictions
    y_out = Model.pred_machine_units(x_in)

    return -1*objective(y_out)


def objective(y_out):
    """
    :param y_out: tensor with has a shape of (N, num_outputs)
    returns tensor of emittance * bmag for each input, shape (N, 1)
    """
    
    # geometric emittance in transverse plane
    out1 = y_out[:,Model.loc_out['norm_emit_x']] #grab norm_emit_x out of the model
    out2 = y_out[:,Model.loc_out['norm_emit_y']] #grab norm_emit_y out of the model
    emit = torch.sqrt(out1 * out2)
  
    sigma_x = y_out[:,Model.loc_out['sigma_x']] #grab sigma_x out of the model 
    sigma_y = y_out[:,Model.loc_out['sigma_y']] #grab sigma_y out of the model 
    
    # real beta and alpha 
    # NEEDS TO BE FIXED - currently assuming real alpha to be the same as design alpha 
    alpha_x = torch.tensor(alpha0_x).repeat(y_out.shape[0])
    alpha_y = torch.tensor(alpha0_y).repeat(y_out.shape[0])
    beta_x, beta_y = (sigma_x**2) / out1, (sigma_y**2) / out2
    
    # bmag 
    bmag_x = 0.5 * ((beta0_x / beta_x) + (beta_x / beta0_x)) + 0.5 * ((alpha_x * torch.sqrt(beta0_x / beta_x) - alpha0_x * torch.sqrt(beta_x / beta0_x))**2)
    bmag_y = 0.5 * ((beta0_y / beta_y) + (beta_y / beta0_y)) + 0.5 * ((alpha_y * torch.sqrt(beta0_y / beta_y) - alpha0_y * torch.sqrt(beta_y / beta0_y))**2)
    bmag = torch.sqrt(bmag_x * bmag_y)
    
    out = (emit * bmag)/1e-6 # in um units 
    return out.reshape(-1,1)

In [None]:
# mesh grid of 3^9 points to sample
n_samples_per_var = 3
n_var = 9

filename = f'grid{n_samples_per_var}^{n_var}.pt'

"""
create x and y if they are not saved in grid.pt, otherwise load x and y from grid.pt
"""
# create input and output data
if os.path.exists('./results/'+filename) and os.stat('./results/'+filename).st_size > 0: 
    training_dict = torch.load('grid.pt')
    x = training_dict['x']
    y = training_dict['y']
else: 
    var_points = torch.zeros((n_var, n_samples_per_var)) 
    # take n_samples_per_var points from each dimension i
    for i in range(n_var):
        var_points[i,:] = torch.linspace(bounds[i,0],bounds[i,1],n_samples_per_var)

    # generate grid of points to sample 
    grid = np.array(np.meshgrid(*var_points)).reshape(n_var,-1)
    x = (torch.tensor(grid).t())
    x_data = torch.utils.data.TensorDataset(x)
    x_loader = torch.utils.data.DataLoader(x_data, batch_size=3**9, shuffle=False)
    
    y = torch.cat([evaluate(x_sample[0]).detach() for batch_idx, x_sample in enumerate(x_loader)])
    torch.save({'x': x, 'y': y}, './results/'+filename)

print(x, y)

tensor([[ 0.4600, -0.0200, -0.0200,  ..., -1.0000, -1.0000, -7.0000],
        [ 0.4600, -0.0200, -0.0200,  ..., -1.0000, -1.0000, -3.0000],
        [ 0.4600, -0.0200, -0.0200,  ..., -1.0000, -1.0000,  1.0000],
        ...,
        [ 0.4850,  0.0200,  0.0200,  ...,  7.0000,  7.0000, -7.0000],
        [ 0.4850,  0.0200,  0.0200,  ...,  7.0000,  7.0000, -3.0000],
        [ 0.4850,  0.0200,  0.0200,  ...,  7.0000,  7.0000,  1.0000]])
torch.Size([19683, 9])
tensor([[ -6.9780],
        [-10.3164],
        [ -5.1626],
        ...,
        [ -4.3536],
        [ -6.9199],
        [ -7.3886]], dtype=torch.float64)


## Fit data with second order polynomial

In [None]:
# f(x) = x^T A x + B x + C
# x is a 9x1 column vector, A is a symmetric 9x9 matrix, B is a 1x9 vector, C is a scalar.

a_vals = torch.randn((int(n_var*(n_var+1)/2),1)).reshape(1,-1)
A = torch.zeros(n_var, n_var)
i, j = torch.triu_indices(n_var, n_var)
A[i, j] = a_vals
A.T[i, j] = a_vals
A = A.clone().detach().requires_grad_(True)

B = torch.randn((1, n_var), requires_grad=True)

C = torch.randn((1,1), requires_grad = True)

learning_rate = 1e-4
n_epochs = 2000
for i in range(n_epochs):
    # if (i > 30 and i % 10 == 0 and learning_rate >= 1e-7):
    #     learning_rate /= 10
    y_pred = torch.cat([torch.matmul(ele, torch.matmul(A, ele.T)) + torch.matmul(B, ele.T) + C for ele in x]).reshape(1,-1)[0]

    loss = (y_pred - y).pow(2).mean() #
    if (i % 50 == 0):
        print(i, loss.item())
        print(y_pred)
    #print(loss.item())
    loss.backward()

    with torch.no_grad():
        A -= learning_rate * A.grad
        B -= learning_rate * B.grad
        C -= learning_rate * C.grad

        # Manually zero the gradients after updating weights
        A.grad = None
        B.grad = None
        C.grad = None

print(f'Result: y = x{A}X^T + {B} x + {C.item()}')
torch.save({'x': x, 'y': y, 'A': A, 'B': B, 'C': C, 'y_pred': y_pred}, f'polynomial_{filename}')

tensor([[-1.5256, -0.7502, -0.6540, -1.6095, -0.1002, -0.6092, -0.9798, -1.6091,
         -0.7121],
        [-0.7502,  0.3037, -0.7773, -0.2515, -0.2223,  1.6871,  0.2284,  0.4676,
         -0.6970],
        [-0.6540, -0.7773, -1.1608,  0.6995,  0.1991,  0.8657,  0.2444, -0.6629,
          0.8073],
        [-1.6095, -0.2515,  0.6995,  1.1017, -0.1759, -2.2456, -1.4465,  0.0612,
         -1.2150],
        [-0.1002, -0.2223,  0.1991, -0.1759,  0.7312,  1.1718, -0.9274,  0.5451,
          0.0663],
        [-0.6092,  1.6871,  0.8657, -2.2456,  1.1718, -0.4370,  0.7626,  1.1633,
         -0.0091],
        [-0.9798,  0.2284,  0.2444, -1.4465, -0.9274,  0.7626, -0.8425,  0.1374,
          0.9386],
        [-1.6091,  0.4676, -0.6629,  0.0612,  0.5451,  1.1633,  0.1374, -0.1860,
         -0.6446],
        [-0.7121, -0.6970,  0.8073, -1.2150,  0.0663, -0.0091,  0.9386, -0.6446,
          1.5392]], requires_grad=True)
0 20197.07253819996
tensor([-111.2910, -137.2792, -114.0115,  ...,  -37.7810,  

## Scan

In [None]:
# models  
training_dict = torch.load(f'polynomial_{filename}')
x = training_dict['x']
y = training_dict['y']
A = training_dict['A']
B = training_dict['B'] 
C = training_dict['C'] 
y_pred = training_dict['y_pred']

def polynomial_model(x):
    """
    :param x: input values, tensor, shape (num_samples, num_var) 
    returns y_pred as tensor, shape (1, num_samples) 
    """ 
    return torch.cat([torch.matmul(ele, torch.matmul(A, ele.T)) + torch.matmul(B, ele.T) + C for ele in x.float()]).reshape(1,-1)[0]


In [None]:

# use ref point values for the parameters that are kept constant 
scan_ref_point = torch.cat([torch.as_tensor([ref_point[0][Model.loc_in[param_name]]]) for param_name in opt_var_names])

plt.rcParams["figure.figsize"] = (15,5)
# plot loss
i = torch.arange(100)
#plt.plot(i, train_losses[:100], label = "train")
plt.plot(i, valid_losses, label = "valid")
plt.legend()
plt.show()
 
def scan(model_param, num_scans, model_prior):
    """
    scans values across a single parameter of the model, keeping others constant
    :param model_param: name of the model param that is being scanned, string, size 1 
    :param num_scans: number of values scanned for parameter 
    returns model_test_y, polynomial_test_y of all scanned values  
    """ 
    param_index = opt_var_names.index(model_param)
    test_x = scan_ref_point.repeat(num_scans, 1) 
    test_x[:,param_index] = torch.linspace(bounds[param_index, 0], bounds[param_index, 1], num_scans)
    #print(test_x)
    
    model_test_y = torch.as_tensor([evaluate(ele) for ele in test_x])
    prior_test_y = model_prior(test_x.float()) 
    #prior_test_y = model_prior(F.normalize(test_x, dim = 0).float()) 
    return model_test_y, prior_test_y, test_x

plt.rcParams["figure.figsize"] = (15,35)
for i in range(len(opt_var_names)):
    model_param = opt_var_names[i] 
    model_test_y, prior_test_y, test_x = scan(model_param, 30, NN_model) 
    plt.subplot(9, 2, i+1)
    plt.xlabel(model_param)
    plt.ylabel('emmitance*bmag')
    plt.plot(test_x[:,opt_var_names.index(model_param)], model_test_y, label = "model_y")
    plt.plot(test_x[:,opt_var_names.index(model_param)], prior_test_y.detach(), label = "NN_y_100epochs_2hidden_tanh")
    plt.legend()
    
plt.show()
