Use this notebook to evaluate your optimization methods against simple optimization problems, then compare your results with other optimizers.

# Optimizer benchmark setup

Here you set up the optimization problem and your optimizer

## Optimization problems

In [7]:
import numpy as np
from scipy.optimize import minimize
import time
import policy_grad_optimizer

"""
1D problem.
Global min at w[0]=1, f(w)=0
Initial condition = 2
"""
one_d_problem = {
  'init_conds': [2.0],
  'obj_function': lambda w: (w[0]-1)**2
}

"""
2D problem.
Global min at w[0]=1, w[1]= 2, f(w)=0
Initial condition = 2
"""
two_d_problem = {
  'init_conds': [2.0, 1.0],
  'obj_function': lambda w: (w[0]-1)**2 + (w[1]-2)**2
}

"""
9D problem.
Global min at w[i] = i+1, f(x)=0
Initial condition = 2
"""
def nine_d_obj_fun(w):
  res = 0 
  for i in range(9):
    res += (w[i] - (i+1)) ** 2
  return res
nine_d_problem = {
  'init_conds': [9.0 - i for i in range(9)],
  'obj_function': nine_d_obj_fun
}



## Optimizers

In [8]:
# Optimizer 1: Scipy
def scipy_trust_constr(obj_fun, init_conds):
  res = minimize(obj_fun, init_conds, method='trust-constr', options={'maxiter':100})
  print(res)
  return res.x

# Optimizer 2: Policy gradient with w and f(w) historical info
policy_grad_optimizer.optimize_with_w_and_dfw

<function policy_grad_optimizer.optimize_with_w_and_dfw(f, w0)>

## Final benchmark parameters

In [9]:
# optimizer. A function that accepts a scalar function f(w) and the initial condition
# and will return argmin w f(w) 
OPTIMIZER = policy_grad_optimizer.optimize_with_w_and_dfw
PROBLEM_SPEC = one_d_problem
NUM_RUNS = 10

# Run the benchmark (This might take a while)

In [10]:
runtimes = []
fws = []
ws = []
for run_i in range(NUM_RUNS):
  # Use a different seed each run. We will get reproducible results, and still allow each optimizer
  # to run with different seeds.
  np.random.seed(run_i)
  start_time_s = time.time()
  f = PROBLEM_SPEC['obj_function']
  w = OPTIMIZER(f, PROBLEM_SPEC['init_conds'])
  elapsed_time_s = time.time() - start_time_s
  fw = f(w)
  runtimes.append(elapsed_time_s)
  fws.append(fw)
  ws.append(w)

  print("Run %d / %d. f(w) = %.2f. Runtime (s)= %.2f. w = %s." % (run_i+1, NUM_RUNS, fw, elapsed_time_s, w))

print("All f(w)'s = %s" % np.sort(fws))
print("All w's = %s" % ws)
print("Average runtime (s) = %.2f" % np.mean(elapsed_time_s))
print("Min f(w) = %.2f" % np.min(fws))
print("Mean f(w) = %.2f" % np.mean(fws))
print("Median f(w) = %.2f" % np.median(fws))
print("Best w = %s" % (ws[np.argmin(fws)]))

Run 1 / 10. f(w) = 0.04. Runtime (s)= 1.36. w = [0.8].
Run 2 / 10. f(w) = 26.01. Runtime (s)= 1.30. w = [6.1].
Run 3 / 10. f(w) = 445.21. Runtime (s)= 1.24. w = [22.1].
Run 4 / 10. f(w) = 0.09. Runtime (s)= 1.40. w = [0.7].
Run 5 / 10. f(w) = 0.09. Runtime (s)= 1.19. w = [0.7].
Run 6 / 10. f(w) = 5.29. Runtime (s)= 1.42. w = [-1.3].
Run 7 / 10. f(w) = 1616.04. Runtime (s)= 1.24. w = [41.2].
Run 8 / 10. f(w) = 823.69. Runtime (s)= 1.34. w = [29.7].
Run 9 / 10. f(w) = 640.09. Runtime (s)= 1.08. w = [26.3].
Run 10 / 10. f(w) = 0.09. Runtime (s)= 1.36. w = [0.7].
All f(w)'s = [4.00000e-02 9.00000e-02 9.00000e-02 9.00000e-02 5.29000e+00 2.60100e+01
 4.45210e+02 6.40090e+02 8.23690e+02 1.61604e+03]
All w's = [array([0.8]), array([6.1]), array([22.1]), array([0.7]), array([0.7]), array([-1.3]), array([41.2]), array([29.7]), array([26.3]), array([0.7])]
Average runtime (s) = 1.36
Min f(w) = 0.04
Mean f(w) = 355.66
Median f(w) = 15.65
Best w = [0.8]
