In [2]:
import torch
import numpy as np
from bbob_test_functions import *
import time

device = 'cpu'
# set default to type double 
torch.set_default_dtype(torch.float64)

In [3]:
def compute_RGF_gradient(x, f, delta=1e-1, int_samples=100, device='cpu'):
  '''
      compute the RGF gradient.
      input is a single vector x of size (dim x 1)
  '''
  assert(x.shape[1]==1)
  assert(x.shape[0]>=1)

  dim = x.shape[0]

  standard_dev = 1.0

  y = standard_dev * torch.randn(int_samples, dim, device=device)# here y has shape (n_samples x dim)

  x = x.view(1, dim)

  f_x = f(x); 
  f_x = f_x.view(1,1)
  update = x + delta*y
  assert update.shape == (int_samples, dim)
  f_delta = f(update)
  f_delta = f_delta.view(int_samples, 1)

  assert f_delta.shape == (int_samples, 1)

  diff_term = y * (f_delta - f_x)/delta
  assert diff_term.shape == (int_samples, dim)

  grad = torch.mean(diff_term, dim=0).view(-1,1)
  assert grad.shape == (dim,1)

  return grad

In [4]:
# function_name = 'sphere_function'
# function_name = 'ellipsoidal_function'
# function_name = 'rastrigin_function'
# function_name = 'bueche_rastrigin_function'
# function_name = 'attractive_sector_function'
# function_name = 'rosenbrock_function'
# function_name = 'rotated_rosenbrock_function'
# function_name = 'discus_function'
# function_name = 'bent_cigar_function'
# function_name = 'sharp_ridge_function'
# function_name = 'different_powers_function'
function_name = 'weierstrass_function'
# function_name = 'schaffers_f7_function'
# function_name = 'schaffers_f7_moderately_ill_cond_function'
# function_name = 'composite_griewank_rosenbrock_function'
# function_name = 'schwefel_function'


dim = 10
x_true = torch.zeros(dim, device=device)
if function_name == 'schwefel_function':
    one_plus_minus = torch.ones(dim, device=device)
    one_plus_minus[1::2] = -1

    x_true = 4.2096874633/2 * one_plus_minus


x0 = 4*torch.ones(dim, 1, device=device)
n_trials = 3
max_iters = int(1e4)

# define function and choose dimension
def f(x, return_gradient=False):
    # return sphere_function(x)
    # turn function_name string into pytorch function

    if return_gradient==True:
        x.requires_grad = True
    
    fx = eval(function_name)(x)
    if return_gradient==True:
        grad_fx = torch.autograd.grad(outputs=fx, inputs=x, grad_outputs=torch.ones_like(fx), create_graph=True)[0]
        x.requires_grad = False
        fx = fx.detach()
        grad_fx = grad_fx.detach()
        return fx, grad_fx
    else:
        x.requires_grad = False
        return fx

### Run RGF Nesterov

In [6]:
# ---------------------------------------------------------------------------------------------------
# Proximal Point using Laplace's approximation
# ---------------------------------------------------------------------------------------------------
def RGF_Nesterov(x0, f, max_iters, step_size, x_true=x_true, int_samples=100, delta=1e-1, verbose=True, print_freq=1, device='cpu'):

  assert len(x0.shape)==2 and x0.shape[1]==1
  xk = x0.clone()
  fk_hist = torch.zeros(max_iters)
  rel_err_hist = torch.zeros(max_iters)
  time_hist = torch.zeros(max_iters)
  grad_norm_hist = torch.zeros(max_iters)

  for i in range(max_iters):

    start_time = time.time()

    fk, grad_fk = f(xk.permute(1,0), return_gradient=True) # need to input xk with dimensions (n_samples x 1)
    grad_norm = torch.norm(grad_fk)

    rel_err = torch.norm(xk - x_true)

    rgf_grad = compute_RGF_gradient(xk, f, delta=delta, int_samples=int_samples, device=device)

    x_new = xk - step_size*rgf_grad


    fk_hist[i] = fk.cpu()
    rel_err_hist[i] = rel_err.cpu()
    grad_norm_hist[i] = grad_norm.cpu()

    end_time = time.time()
    iter_time = end_time - start_time

    time_hist[i] = iter_time

    if verbose:
      if (i+1)%print_freq == 0:
        print('iter: ', (i+1), ' fk: ', "{:5.2e}".format(fk.item()),
              ' rel_err: ', "{:5.2e}".format(rel_err.item()),
              ' grad_norm:' , "{:5.2e}".format(grad_norm.item()),
              ' time = ', '{:5.2f}'.format(iter_time))

    xk = x_new

  return xk, fk_hist, rel_err_hist, grad_norm_hist

In [8]:
delta_array = [1e-4, 1e-3, 1e-2, 1e-1, 1e0, 1e1, 1e2]
samples_array = [1e1, 1e2, 1e3, 1e4]
step_size_array = [1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 1e0]

f_hist_array = torch.zeros(n_trials, len(delta_array), len(samples_array), max_iters, len(step_size_array))
rel_err_array = torch.zeros(n_trials, len(delta_array), len(samples_array), max_iters, len(step_size_array))
grad_norm_hist_array = torch.zeros(n_trials, len(delta_array), len(samples_array), max_iters, len(step_size_array))

for l in range(len(step_size_array)):

  step_size = step_size_array[l]
  print('step_size = ', step_size)

  for i in range(n_trials):

    for j in range(len(delta_array)):
      current_delta = delta_array[j]
      max_ls_iters = 1

      for k in range(len(samples_array)):
        current_int_samples = int(samples_array[k])

        print('\n --------------- Trial: ', i+1, ', delta: ', current_delta, ', n_samples:', current_int_samples, ' --------------- \n')

        temp_output_tuple = RGF_Nesterov( x0,
                                          f,
                                          max_iters,
                                          step_size,
                                          int_samples = current_int_samples,
                                          delta = current_delta,
                                          device=device,
                                          print_freq=100)
        xopt                = temp_output_tuple[0]
        fk_hist             = temp_output_tuple[1]
        rel_err_hist        = temp_output_tuple[2]
        grad_norm_hist      = temp_output_tuple[3]


        f_hist_array[i,j,k,:,l] = fk_hist
        rel_err_array[i,j,k,:,l] = rel_err_hist
        grad_norm_hist_array[i,j,k,:,l] = grad_norm_hist

step_size =  1e-06

 --------------- Trial:  1 , delta:  0.0001 , n_samples: 10  --------------- 

iter:  100  fk:  6.75e+01  rel_err:  4.00e+01  grad_norm: 1.05e+04  time =   0.00
iter:  200  fk:  6.98e+01  rel_err:  4.00e+01  grad_norm: 1.62e+04  time =   0.00
iter:  300  fk:  6.79e+01  rel_err:  4.00e+01  grad_norm: 1.10e+04  time =   0.00
iter:  400  fk:  6.67e+01  rel_err:  4.00e+01  grad_norm: 1.17e+04  time =   0.00
iter:  500  fk:  5.93e+01  rel_err:  4.00e+01  grad_norm: 1.22e+04  time =   0.00
iter:  600  fk:  6.12e+01  rel_err:  4.00e+01  grad_norm: 1.08e+04  time =   0.00
iter:  700  fk:  6.21e+01  rel_err:  4.00e+01  grad_norm: 1.19e+04  time =   0.00
iter:  800  fk:  6.21e+01  rel_err:  4.00e+01  grad_norm: 8.41e+03  time =   0.00
iter:  900  fk:  6.05e+01  rel_err:  4.00e+01  grad_norm: 7.49e+03  time =   0.00
iter:  1000  fk:  6.06e+01  rel_err:  4.00e+01  grad_norm: 9.23e+03  time =   0.00
iter:  1100  fk:  6.00e+01  rel_err:  4.00e+01  grad_norm: 1.51e+04  time =   0.

### Save Results

In [10]:
save_dir = 'exp_results/'
data_name = 'nesterov_' + function_name + '_dim' + str(int(dim)) + '.pth'
file_name = save_dir + data_name
state = {
    'f_hist_array': f_hist_array,
    'rel_err_array': rel_err_array,
    'grad_norm_hist_array': grad_norm_hist_array,
    'dim': dim,
    'delta_array': delta_array,
    'samples_array': samples_array,
    'step_size_array': step_size_array,
    'n_trials': n_trials,
}
torch.save(state, file_name)
print('files saved to ' + file_name)

files saved to exp_results/nesterov_weierstrass_function_dim10.pth
