/
customcg.py
256 lines (212 loc) · 10.1 KB
/
customcg.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
""" A custom conjugate gradient descent algorithm """
#***************************************************************************************************
# Copyright 2015, 2019 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
# Under the terms of Contract DE-NA0003525 with NTESS, the U.S. Government retains certain rights
# in this software.
# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
# in compliance with the License. You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0 or in the LICENSE file in the root pyGSTi directory.
#***************************************************************************************************
import numpy as _np
try:
from scipy.optimize import Result as _optResult # for earlier scipy versions
except:
from scipy.optimize import OptimizeResult as _optResult # for later scipy versions
def fmax_cg(f, x0, maxiters=100, tol=1e-8, dfdx_and_bdflag=None, xopt=None):
"""
Custom conjugate-gradient (CG) routine for maximizing a function.
This function runs slower than scipy.optimize's 'CG' method, but doesn't
give up or get stuck as easily, and so sometimes can be a better option.
Parameters
----------
fn : function
The function to minimize.
x0 : numpy array
The starting point (argument to fn).
maxiters : int, optional
Maximum iterations.
tol : float, optional
Tolerace for convergence (compared to absolute difference in f)
dfdx_and_bdflag : function, optional
Function to compute jacobian of f as well as a boundary-flag.
xopt : numpy array, optional
Used for debugging, output can be printed relating current optimum
relative xopt, assumed to be a known good optimum.
Returns
-------
scipy.optimize.Result object
Includes members 'x', 'fun', 'success', and 'message'. **Note:** returns
the negated maximum in 'fun' in order to conform to the return value of
other minimization routines.
"""
MIN_STEPSIZE = 1e-8
FINITE_DIFF_STEP = 1e-4
RESET = 5
stepsize = 1e-6
#if no dfdx specifed, use finite differences
if dfdx_and_bdflag is None:
def dfdx_and_bdflag(x): return _finite_diff_dfdx_and_bdflag(f, x, FINITE_DIFF_STEP)
step = 0
x = x0; last_fx = f(x0); last_x = x0
lastchange = 0
lastgradnorm = 0.0 # Safer than relying on uninitialized variables
lastgrad = 0.0
if last_fx is None: raise ValueError("fmax_cg was started out of bounds!")
while(step < maxiters and ((stepsize > MIN_STEPSIZE) or (step % RESET != 1 and RESET > 1))):
grad, boundaryFlag = dfdx_and_bdflag(x)
gradnorm = _np.dot(grad, grad)
if step % RESET == 0: # reset change == gradient
change = grad[:]
else: # add gradient to change (conjugate gradient)
#beta = gradnorm / lastgradnorm # Fletcher-Reeves
beta = (gradnorm - _np.dot(grad, lastgrad)) / lastgradnorm # Polak-Ribiere
#beta = (gradnorm - _np.dot(grad,lastgrad))/(_np.dot(lastchange,grad-lastgrad)) #Hestenes-Stiefel
change = grad + beta * lastchange
for i in range(len(change)):
if boundaryFlag[i] * change[i] > 0:
change[i] = 0
print("DEBUG: fmax Preventing motion along dim %s" % i)
if max(abs(change)) == 0:
print("Warning: Completely Boxed in!")
fx = last_fx; x = last_x
assert(abs(last_fx - f(last_x)) < 1e-6)
break
#i = list(abs(grad)).index(min(abs(grad)))
#change[i] = -boundaryFlag[i] * 1.0 # could pick a random direction to move in?
#gradnorm = 1.0 # punt...
lastgrad = grad
lastgradnorm = gradnorm
multiplier = 1.0 / max(abs(change))
change *= multiplier
# Now "change" has largest element 1. Time to do a linear search to find optimal stepsize.
# If the last step had crazy short length, reset stepsize
if stepsize < MIN_STEPSIZE: stepsize = MIN_STEPSIZE
def g(s): return f(x + s * change) # f along a line given by changedir. Argument to function is stepsize.
stepsize = _maximize1D(g, 0, abs(stepsize), last_fx) # find optimal stepsize along change direction
predicted_difference = stepsize * _np.dot(grad, change)
if xopt is not None: xopt_dot = _np.dot(change, xopt - x) / \
(_np.linalg.norm(change) * _np.linalg.norm(xopt - x))
x += stepsize * change; fx = f(x)
difference = fx - last_fx
print("DEBUG: Max iter ", step, ": f=", fx, ", dexpct=", predicted_difference - difference,
", step=", stepsize, ", xopt_dot=", xopt_dot if xopt is not None else "--",
", chg_dot=",
_np.dot(change, lastchange) / (_np.linalg.norm(change) * _np.linalg.norm(lastchange) + 1e-6))
if abs(difference) < tol: break # Convergence condition
lastchange = change
last_fx = fx
last_x = x.copy()
step += 1
print("Finished Custom Contrained Newton CG Method")
print(" iterations = %d" % step)
print(" maximum f = %g" % fx)
solution = _optResult()
# negate maximum to conform to other minimization routines
solution.x = x; solution.fun = -fx if fx is not None else None
if step < maxiters:
solution.success = True
else:
solution.success = False
solution.message = "Maximum iterations exceeded"
return solution
# Minimize g(s), given (s1,g1=g(s1)) as a starting point and guess, s2 for minimum
def _maximize1D(g, s1, s2, g1):
PHI = (1.0 + _np.sqrt(5.0)) / 2 # golden ratio
TOL = 1e-10; FRAC_TOL = 1e-6
# Note (s1,g1) and s2 are given. Start with bracket (s1,s2,s3)
#s3 = s2*(1.0+PHI); g2 = g(s2); g3 = g(s3)
s3 = s2 + PHI * (s2 - s1); g2 = g(s2); g3 = g(s3)
# s4,g4 = None,None
s1_on_bd = s3_on_bd = False
#print "DEBUG: BEGIN MAX 1D: s1,s3=", (s1,s3)
assert(g1 is not None or g3 is not None)
if g1 is None or g3 is None:
if g1 is None: s1, g1 = _findBoundary(g, s3, s1); s1_on_bd = True
if g3 is None: s3, g3 = _findBoundary(g, s1, s3); s3_on_bd = True
s2 = s1 + (s3 - s1) / PHI; g2 = g(s2)
while((abs(s3 - s1) > TOL) and (abs(s3 - s1) > FRAC_TOL * (abs(s3) + abs(s1)))):
#print "DEBUG: Max1D iter: (",s1,",",g1,") (",s2,",",g2,") (",s3,",",g3,")"
if g3 > g2:
if g2 >= g1: # Expand to the right
if s3_on_bd: print("** Returning on bd"); return s3 # can't expand any further to right
s2, g2 = s3, g3
s3 = s1 + (s3 - s1) * PHI; g3 = g(s3)
if g3 is None:
s3, g3 = _findBoundary(g, s2, s3)
s3_on_bd = True
else: # contract to the left.
s3, g3 = s2, g2
s2 = s1 + (s3 - s1) / PHI; g2 = g(s2)
else:
if g2 <= g1: # Expand to the left
if s1_on_bd: print("** Returning on bd2"); return s1 # can't expand any further to left
s2, g2 = s1, g1
s1 = s3 - (s3 - s1) * PHI; g1 = g(s1)
if g1 is None:
s1, g1 = _findBoundary(g, s2, s1)
s1_on_bd = True
else: # Got it bracketed: now just narrow down bracket
return _max_within_bracket(g, s1, g1, s2, g2, s3, g3)
print("Warning: maximize_1d could not find bracket")
ret = s2 if g2 is not None else s1 # return s2 if it evaluates to a valid point
assert(g(ret) is not None) # otherwise return s1, since it should always be valid
return ret
#TODO: g3 and g1 are unused!
def _max_within_bracket(g, s1, g1, s2, g2, s3, g3):
TOL = 1e-10; FRAC_TOL = 1e-6
assert(s2 - s1 > TOL) # a legit bracket must have s1 < s2
while((abs(s3 - s1) > TOL) and (abs(s3 - s1) > FRAC_TOL * (abs(s3) + abs(s1)))):
s4 = s1 + (s3 - s2); g4 = g(s4)
#print "DEBUG: Max in brk iter: (",s1,",",g1,") (",s2,",",g2,") (",s3,",",g3,") (",s4,",",g4,")"
assert(g4 is not None) # assume function is defined at all points in bracket
if s4 > s2:
if(g4 > g2): # Drop x1 (move to x2), move x2 to x4.
s1, _ = s2, g2
s2, g2 = s4, g4
else: # Drop x3 (move to x4)
s3, _ = s4, g4
else:
if g4 > g2: # Drop x3 (move to x2), move x2 to x4
s3, _ = s2, g2
s2, g2 = s4, g4
else: # Drop x1 (move to x4)
s1, _ = s4, g4
return s2
#find boundary of g (i.e. at the edge of where it is defined)
# g(s1) must be defined (not None) and g(s2) must == None (function undefined)
def _findBoundary(g, s1, s2):
#print "DEBUG: finding bd fn"
TOL = 1e-6
while(abs(s1 - s2) > TOL): # just do binary search
m = (s1 + s2) / 2.0; gm = g(m)
if gm is None: s2 = m
else: s1 = m
return s1, g(s1)
#provide finite difference derivatives with boundary for a given function f. Boundaries are
# determined by the function f returning a None value when it is not defined.
def _finite_diff_dfdx_and_bdflag(f, x, DELTA):
x = x.copy() # make sure x is unaltered
N = len(x)
dfdx = _np.zeros(N) # complex?
bd = _np.zeros(N)
for k in range(N):
x[k] += DELTA; fPlus = f(x)
x[k] -= 2 * DELTA; fMinus = f(x)
x[k] += DELTA
if fPlus is None: bd[k] = +1.0
elif fMinus is None: bd[k] = -1.0
else: dfdx[k] = (fPlus - fMinus) / (2 * DELTA)
#assert(fPlus is not None or fMinus is not None) #make sure we don't evaluate f somewhere it's
#completely undefined
return dfdx, bd
#def f6(param):
# '''Schaffer's F6 function'''
# para = param*10
# para = param[0:2]
# num = (sin(sqrt((para[0] * para[0]) + (para[1] * para[1])))) * \
# (sin(sqrt((para[0] * para[0]) + (para[1] * para[1])))) - 0.5
# denom = (1.0 + 0.001 * ((para[0] * para[0]) + (para[1] * para[1]))) * \
# (1.0 + 0.001 * ((para[0] * para[0]) + (para[1] * para[1])))
# f6 = 0.5 - (num/denom)
# errorf6 = 1 - f6
# return f6, errorf6;