In [52]:
import numpy as np
from scipy.stats import multivariate_normal

Log:

- 11/10 Fixed bugs: H^(-1/2) formula, shapes of theta & epsilons

# Functions to be tested

This is a test using the Rosebrock function.

The glomal max is at (1,1), with obj. value = 0.

In [109]:
a = 1
b = 10
def F(theta):
    x = theta[0]
    y = theta[1]
    return - ((a - x)**2 + b*((y - x**2)**2))

In [110]:
F(np.array((1,2)))

-10

# Gradient benchmark

This is exactly the Algorithm 1 in the RL paper.

In [123]:
def ES_benchmark_gradient(alpha, sigma, theta_0, num_samples, time_steps):
    theta_t = theta_0
    d = theta_0.shape[0]
    n = num_samples
    for t in range(time_steps):
        #**** sample epsilons ****#
        eps_list = [] 
        for i in range(n):
            eps = np.random.multivariate_normal(mean = np.zeros(d), cov = np.identity(d))
            eps_list.append(eps)
        #**** compute function values ****#
        F_list = []
        for i in range(n):
            F_val = F(theta_t + sigma*eps_list[i])
            F_list.append(F_val)
        #**** update theta ****#
        new_theta = theta_t
        for i in range(n):
            new_theta += alpha / (n*sigma) * F_list[i] * eps_list[i]
        theta_t = new_theta
    return theta_t, F(theta_t)

In [124]:
ES_benchmark_gradient(alpha=0.0002, sigma=0.1, theta_0 = np.array([2.0,2.0]), num_samples = 50, time_steps = 10000)

(array([1.05175684, 1.12132228]), -0.004967887958809249)

# HessAware Benchmark

This implements the Hess Aware algorithm in Zhang's paper. 

The Hessian estimate comes from Section 4.1.

In [121]:
def ES_benchmark_hess_aware(alpha, sigma, theta_0, num_samples, time_steps, p, H_lambda):
    d = theta_0.shape[0]
    theta_t = theta_0.reshape((d,1))
    n = num_samples
    H = None
    for t in range(time_steps):
        #**** sample epsilons ****#
        eps_list = [] 
        for i in range(n):
            eps = np.random.multivariate_normal(mean = np.zeros(d), cov = np.identity(d))
            eps_list.append(eps.reshape(d,1))
        #**** compute function values ****#
        F_plus_list = []
        F_minus_list = []
        F_list = []
        for i in range(n):
            F_plus_list.append(F(theta_t + sigma*eps_list[i]))
            F_minus_list.append(F(theta_t - sigma*eps_list[i]))
            F_list.append(F(theta_t))
        #**** compute Hessian every p steps ****#
        if t % p == 0:
            H = np.zeros((d,d))
            for i in range(n):
                e_i = eps_list[i]
                e_i_trans = np.transpose(eps_list[i])
                H += (F_plus_list[i] + F_minus_list[i] - 2*F_list[i]) * (e_i @ e_i_trans)
            H /= 2*(sigma**2)*n
            H += H_lambda * np.identity(d)
        #**** update theta: compute g ****#
        u, s, vh = np.linalg.svd(H)
        H_nh = u @ np.diag(s**-0.5) @ vh
        g = 0
        for i in range(n):
            e_i = eps_list[i]
            F_new = F(theta_t +  sigma* (H_nh @ e_i)  )
            g += ((F_new - F(theta_t)) / sigma ) * (H_nh @ e_i) / n
        #**** update theta: the rest ****#
        new_theta = theta_t + alpha * g
        theta_t = new_theta
        
    return theta_t, F(theta_t), H

In [122]:
ES_benchmark_hess_aware(alpha=0.5, sigma=0.1, theta_0=np.array([2.0, 2.0]), num_samples = 50, time_steps = 2000, p = 10, H_lambda = 0)

(array([[0.99470377],
        [0.9892107 ]]),
 array([-2.85558194e-05]),
 array([[-137.46508034,   44.17335996],
        [  44.17335996,  -68.21204881]]))

# First Hessian-based method

We use the Hessian estimate as in the write-up document on Overleaf.

Then, the same Newton's method as in Zhang's paper is used to update theta with the Hessian estimate.

As before, alpha is the learning rate. The parameter p defines how often we re-compute the Hessian.

In [125]:
def ES_hessian(alpha, sigma, theta_0, num_samples, time_steps, p, H_lambda):
    d = theta_0.shape[0]
    theta_t = theta_0.reshape((d,1))
    n = num_samples
    H = None
    for t in range(time_steps):
        #**** sample epsilons ****#
        eps_list = [] 
        for i in range(n):
            eps = np.random.multivariate_normal(mean = np.zeros(d), cov = np.identity(d))
            eps_list.append(eps.reshape(d,1))
        #**** compute function values ****#
        F_list = []
        for i in range(n):
            F_val = F(theta_t + sigma*eps_list[i])
            F_list.append(F_val)
        #**** compute Hessian every p steps ****#
        if t % p == 0:
            H = np.zeros((d,d))
            for i in range(n):
                e_i = eps_list[i]
                e_i_trans = np.transpose(e_i)
                H += F_list[i] * (e_i @ e_i_trans - np.identity(d)) 
            H /= (sigma**2) * n
            H += H_lambda * np.identity(d)
        #**** update theta: compute g ****#
        u, s, vh = np.linalg.svd(H)
        H_nh = u @ np.diag(s**-0.5) @ vh
        g = 0
        for i in range(n):
            e_i = eps_list[i]
            F_new = F(theta_t +  sigma* (H_nh @ e_i)  )
            g += ((F_new - F(theta_t)) / sigma ) * (H_nh @ e_i) / n
        #**** update theta: the rest ****#
        new_theta = theta_t + alpha * g
        theta_t = new_theta
        
    return theta_t, F(theta_t), H

In [130]:
ES_hessian(alpha=0.1, sigma=0.1, theta_0=np.array([2.0, 2.0]), num_samples = 50, time_steps = 5000, p = 10, H_lambda = 1e-4)

(array([[3.6482616e+26],
        [1.5762020e+27]]),
 array([-1.77151114e+107]),
 array([[-5.10086008e+107, -8.18841120e+107],
        [-8.18841120e+107, -5.62896704e+108]]))