In [110]:
import numpy as np
import nevergrad as ng
from scipy.stats import multivariate_normal

# Functions to be tested

For now, use a very simple function - quadratic with max value 0, and theta* = [0.5, 0.5] 

Also note that we will maximize the function, not minimize.

In [111]:
def F(theta):
    return -sum((theta - 0.5) ** 2)

# Gradient benchmark

This is exactly the Algorithm 1 in the RL paper.

In [117]:
def ES_benchmark_gradient(alpha, sigma, theta_0, num_samples, time_steps):
    theta_t = theta_0
    d = theta_0.shape[0]
    n = num_samples
    for t in range(time_steps):
        #**** sample epsilons ****#
        eps_list = [] 
        for i in range(n):
            eps = np.random.multivariate_normal(mean = np.zeros(d), cov = np.identity(d))
            eps_list.append(eps)
        #**** compute function values ****#
        F_list = []
        for i in range(n):
            F_val = F(theta_t + sigma*eps_list[i])
            F_list.append(F_val)
        #**** update theta ****#
        new_theta = theta_t
        for i in range(n):
            new_theta += alpha / (n*sigma) * F_list[i] * eps_list[i]
        theta_t = new_theta
    return theta_t, F(theta_t)

In [120]:
ES_benchmark_gradient(alpha=0.001, sigma=0.01, theta_0 = np.array([1.0,1.0]), num_samples = 10, time_steps = 1000)

(array([0.58225739, 0.55440925]), -0.009726644536737042)

# First Hessian-based method

We use the Hessian estimate as in the write-up document on Overleaf.

Then, the same Newton's method as in Zhang's paper is used to update theta with the Hessian estimate.

As before, alpha is the learning rate. The parameter p defines how often we re-compute the Hessian.

In [122]:
def ES_hessian(alpha, sigma, theta_0, num_samples, time_steps, p):
    theta_t = theta_0
    d = theta_0.shape[0]
    n = num_samples
    H = None
    for t in range(time_steps):
        #**** sample epsilons ****#
        eps_list = [] 
        for i in range(n):
            eps = np.random.multivariate_normal(mean = np.zeros(d), cov = np.identity(d))
            eps_list.append(eps)
        #**** compute function values ****#
        F_list = []
        for i in range(n):
            F_val = F(theta_t + sigma*eps_list[i])
            F_list.append(F_val)
        #**** compute Hessian every p steps ****#
        if t % p == 0:
            H = np.zeros((d,d))
            for i in range(n):
                e_i = eps_list[i].reshape((d,1))
                e_i_trans = eps_list[i].reshape((1,d))
                H += F_list[i] * (np.matmul(e_i, e_i_trans) - np.identity(d) ) / n
            H /= sigma**2
        #**** update theta: compute g ****#
        u, s, vh = np.linalg.svd(H)
        H_nh = u @ np.diag(s**-0.5)
        g = 0
        for i in range(n):
            e_i = eps_list[i].reshape((d,1)) 
            F_new = F(theta_t +  sigma* (H_nh @ e_i)  )
            g += ((F_new - F(theta_t)) / sigma ) @ (H_nh @ e_i) / n
        #**** update theta: the rest ****#
        new_theta = theta_t + alpha * g
        theta_t = new_theta
        
    return theta_t, F(theta_t), H

In [133]:
ES_hessian(alpha=0.005, sigma=0.01, theta_0 = np.array([1.0,1.0]), num_samples = 50, time_steps = 5000, p = 10)

(array([0.50031513, 0.50031513]),
 -1.986128335916654e-07,
 array([[-0.50137429, -0.18191992],
        [-0.18191992, -2.4581628 ]]))