Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

ENH: optimize: refactor line searches to scalar search + line wrapper…

…; move to linesearch.py

Each of the existing line search functions

    f(x + s*p) -> suitable minimum

is split to a scalar search function

    phi(s) -> suitable minimum

and a wrapper

    phi_p(s) = f(x + s*p)

that makes it a line search.

This refactoring makes the logic in the functions slightly clearer, and
makes reusing them in large-scale non-linear equation solvers (which
only need 1-d searches) more straightforward.

New tests for line searches are also added. These basically check that
they do what they promise, for a couple of problems.
  • Loading branch information...
commit fefef2d73200d535b95ce0f21dcfe122301a967d 1 parent 308535a
@pv pv authored
View
608 scipy/optimize/linesearch.py
@@ -1,53 +1,593 @@
-
from scipy.optimize import minpack2
-import numpy
+import numpy as np
+
+__all__ = ['line_search_wolfe1', 'line_search_wolfe2',
+ 'scalar_search_wolfe1', 'scalar_search_wolfe2',
+ 'line_search_armijo']
+
+#------------------------------------------------------------------------------
+# Minpack's Wolfe line and scalar searches
+#------------------------------------------------------------------------------
+
+def line_search_wolfe1(f, fprime, xk, pk, gfk=None,
+ old_fval=None, old_old_fval=None,
+ args=(), c1=1e-4, c2=0.9, amax=50, amin=1e-8,
+ xtol=1e-14):
+ """
+ As `scalar_search_wolfe1` but do a line search to direction `pk`
-import __builtin__
-pymin = __builtin__.min
+ Parameters
+ ----------
+ f : callable
+ Function `f(x)`
+ fprime : callable
+ Gradient of `f`
+ xk : array-like
+ Current point
+ pk : array-like
+ Search direction
-def line_search(f, myfprime, xk, pk, gfk, old_fval, old_old_fval,
- args=(), c1=1e-4, c2=0.9, amax=50):
+ gfk : array-like, optional
+ Gradient of `f` at point `xk`
+ old_fval : float, optional
+ Value of `f` at point `xk`
+ old_old_fval : float, optional
+ Value of `f` at point preceding `xk`
- fc = 0
- gc = 0
- phi0 = old_fval
- derphi0 = numpy.dot(gfk,pk)
- alpha1 = pymin(1.0,1.01*2*(phi0-old_old_fval)/derphi0)
+ The rest of the parameters are the same as for `scalar_search_wolfe1`.
- if isinstance(myfprime,type(())):
- eps = myfprime[1]
- fprime = myfprime[0]
- newargs = (f,eps) + args
+ Returns
+ -------
+ stp, f_count, g_count, fval, old_fval
+ As in `line_search_wolfe1`
+ gval : array
+ Gradient of `f` at the final point
+
+ """
+ if gfk is None:
+ gfk = fprime(xk)
+
+ if isinstance(fprime, tuple):
+ eps = fprime[1]
+ fprime = fprime[0]
+ newargs = (f, eps) + args
gradient = False
else:
- fprime = myfprime
newargs = args
gradient = True
- xtol = 1e-14
- amin = 1e-8
- isave = numpy.zeros((2,), numpy.intc)
- dsave = numpy.zeros((13,), float)
+ gval = [gfk]
+ gc = [0]
+ fc = [0]
+
+ def phi(s):
+ fc[0] += 1
+ return f(xk + s*pk, *args)
+
+ def derphi(s):
+ gval[0] = fprime(xk + s*pk, *newargs)
+ if gradient:
+ gc[0] += 1
+ else:
+ fc[0] += len(xk) + 1
+ return np.dot(gval[0], pk)
+
+ derphi0 = np.dot(gfk, pk)
+
+ stp, fval, old_fval = scalar_search_wolfe1(
+ phi, derphi, old_fval, old_old_fval, derphi0,
+ c1=c1, c2=c2, amax=amax, amin=amin, xtol=xtol)
+
+ return stp, fc[0], gc[0], fval, old_fval, gval[0]
+
+
+def scalar_search_wolfe1(phi, derphi, phi0=None, old_phi0=None, derphi0=None,
+ c1=1e-4, c2=0.9,
+ amax=50, amin=1e-8, xtol=1e-14):
+ """
+ Scalar function search for alpha that satisfies strong Wolfe conditions
+
+ alpha > 0 is assumed to be a descent direction.
+
+ Parameters
+ ----------
+ phi : callable phi(alpha)
+ Function at point `alpha`
+ derphi : callable dphi(alpha)
+ Derivative `d phi(alpha)/ds`. Returns a scalar.
+
+ phi0 : float, optional
+ Value of `f` at 0
+ old_phi0 : float, optional
+ Value of `f` at the previous point
+ derphi0 : float, optional
+ Value `derphi` at 0
+ amax : float, optional
+ Maximum step size
+ c1, c2 : float, optional
+ Wolfe parameters
+
+ Returns
+ -------
+ alpha : float
+ Step size, or None if no suitable step was found
+ phi : float
+ Value of `phi` at the new point `alpha`
+ phi0 : float
+ Value of `phi` at `alpha=0`
+
+ Notes
+ -----
+ Uses routine DCSRCH from MINPACK.
+
+ """
+
+ if phi0 is None:
+ phi0 = phi(0.)
+ if derphi0 is None:
+ derphi0 = derphi(0.)
+
+ if old_phi0 is not None:
+ alpha1 = min(1.0, 1.01*2*(phi0 - old_phi0)/derphi0)
+ if alpha1 < 0:
+ alpha1 = 1.0
+ else:
+ alpha1 = 1.0
+
+ phi1 = phi0
+ derphi1 = derphi0
+ isave = np.zeros((2,), np.intc)
+ dsave = np.zeros((13,), float)
task = 'START'
- fval = old_fval
- gval = gfk
while 1:
- stp,fval,derphi,task = minpack2.dcsrch(alpha1, phi0, derphi0, c1, c2,
- xtol, task, amin, amax,isave,dsave)
-
+ stp, phi1, derphi1, task = minpack2.dcsrch(alpha1, phi1, derphi1,
+ c1, c2, xtol, task,
+ amin, amax, isave, dsave)
if task[:2] == 'FG':
alpha1 = stp
- fval = f(xk+stp*pk,*args)
- fc += 1
- gval = fprime(xk+stp*pk,*newargs)
- if gradient: gc += 1
- else: fc += len(xk) + 1
- phi0 = fval
- derphi0 = numpy.dot(gval,pk)
+ phi1 = phi(stp)
+ derphi1 = derphi(stp)
else:
break
- if task[:5] == 'ERROR' or task[1:4] == 'WARN':
+ if task[:5] == 'ERROR':
stp = None # failed
- return stp, fc, gc, fval, old_fval, gval
+
+ return stp, phi1, phi0
+
+line_search = line_search_wolfe1
+
+#------------------------------------------------------------------------------
+# Pure-Python Wolfe line and scalar searches
+#------------------------------------------------------------------------------
+
+def line_search_wolfe2(f, myfprime, xk, pk, gfk=None, old_fval=None,
+ old_old_fval=None, args=(), c1=1e-4, c2=0.9, amax=50):
+ """Find alpha that satisfies strong Wolfe conditions.
+
+ Parameters
+ ----------
+ f : callable f(x,*args)
+ Objective function.
+ myfprime : callable f'(x,*args)
+ Objective function gradient (can be None).
+ xk : ndarray
+ Starting point.
+ pk : ndarray
+ Search direction.
+ gfk : ndarray, optional
+ Gradient value for x=xk (xk being the current parameter
+ estimate). Will be recomputed if omitted.
+ old_fval : float, optional
+ Function value for x=xk. Will be recomputed if omitted.
+ old_old_fval : float, optional
+ Function value for the point preceding x=xk
+ args : tuple, optional
+ Additional arguments passed to objective function.
+ c1 : float, optional
+ Parameter for Armijo condition rule.
+ c2 : float, optional
+ Parameter for curvature condition rule.
+
+ Returns
+ -------
+ alpha0 : float
+ Alpha for which ``x_new = x0 + alpha * pk``.
+ fc : int
+ Number of function evaluations made.
+ gc : int
+ Number of gradient evaluations made.
+
+ Notes
+ -----
+ Uses the line search algorithm to enforce strong Wolfe
+ conditions. See Wright and Nocedal, 'Numerical Optimization',
+ 1999, pg. 59-60.
+
+ For the zoom phase it uses an algorithm by [...].
+
+ """
+ fc = [0]
+ gc = [0]
+ gval = [None]
+
+ def phi(alpha):
+ fc[0] += 1
+ return f(xk + alpha * pk, *args)
+
+ if isinstance(myfprime, tuple):
+ def derphi(alpha):
+ fc[0] += len(xk)+1
+ eps = myfprime[1]
+ fprime = myfprime[0]
+ newargs = (f,eps) + args
+ gval[0] = fprime(xk+alpha*pk, *newargs) # store for later use
+ return np.dot(gval[0], pk)
+ else:
+ fprime = myfprime
+ def derphi(alpha):
+ gc[0] += 1
+ gval[0] = fprime(xk+alpha*pk, *args) # store for later use
+ return np.dot(gval[0], pk)
+
+ derphi0 = np.dot(gfk, pk)
+
+ alpha_star, phi_star, old_fval, derphi_star = \
+ scalar_search_wolfe2(phi, derphi, old_fval, old_old_fval,
+ derphi0, c1, c2, amax)
+
+ if derphi_star is not None:
+ # derphi_star is a number (derphi) -- so use the most recently
+ # calculated gradient used in computing it derphi = gfk*pk
+ # this is the gradient at the next step no need to compute it
+ # again in the outer loop.
+ derphi_star = gval[0]
+
+ return alpha_star, fc[0], gc[0], phi_star, old_fval, derphi_star
+
+
+def scalar_search_wolfe2(phi, derphi=None, phi0=None,
+ old_phi0=None, derphi0=None,
+ c1=1e-4, c2=0.9, amax=50):
+ """Find alpha that satisfies strong Wolfe conditions.
+
+ alpha > 0 is assumed to be a descent direction.
+
+ Parameters
+ ----------
+ phi : callable f(x,*args)
+ Objective scalar function.
+
+ derphi : callable f'(x,*args), optional
+ Objective function derivative (can be None)
+ phi0 : float, optional
+ Value of phi at s=0
+ old_phi0 : float, optional
+ Value of phi at previous point
+ derphi0 : float, optional
+ Value of derphi at s=0
+ args : tuple
+ Additional arguments passed to objective function.
+ c1 : float
+ Parameter for Armijo condition rule.
+ c2 : float
+ Parameter for curvature condition rule.
+
+ Returns
+ -------
+ alpha_star : float
+ Best alpha
+ phi_star
+ phi at alpha_star
+ phi0
+ phi at 0
+ derphi_star
+ derphi at alpha_star
+
+ Notes
+ -----
+ Uses the line search algorithm to enforce strong Wolfe
+ conditions. See Wright and Nocedal, 'Numerical Optimization',
+ 1999, pg. 59-60.
+
+ For the zoom phase it uses an algorithm by [...].
+
+ """
+
+ if phi0 is None:
+ phi0 = phi(0.)
+
+ if derphi0 is None and derphi is not None:
+ derphi0 = derphi(0.)
+
+ alpha0 = 0
+ if old_phi0 is not None:
+ alpha1 = min(1.0, 1.01*2*(phi0 - old_phi0)/derphi0)
+ else:
+ alpha1 = 1.0
+
+ if alpha1 < 0:
+ alpha1 = 1.0
+
+ if alpha1 == 0:
+ # This shouldn't happen. Perhaps the increment has slipped below
+ # machine precision? For now, set the return variables skip the
+ # useless while loop, and raise warnflag=2 due to possible imprecision.
+ alpha_star = None
+ phi_star = phi0
+ phi0 = old_phi0
+ derphi_star = None
+
+ phi_a1 = phi(alpha1)
+ #derphi_a1 = derphi(alpha1) evaluated below
+
+ phi_a0 = phi0
+ derphi_a0 = derphi0
+
+ i = 1
+ maxiter = 10
+ while 1: # bracketing phase
+ if alpha1 == 0:
+ break
+ if (phi_a1 > phi0 + c1*alpha1*derphi0) or \
+ ((phi_a1 >= phi_a0) and (i > 1)):
+ alpha_star, phi_star, derphi_star = \
+ _zoom(alpha0, alpha1, phi_a0,
+ phi_a1, derphi_a0, phi, derphi,
+ phi0, derphi0, c1, c2)
+ break
+
+ derphi_a1 = derphi(alpha1)
+ if (abs(derphi_a1) <= -c2*derphi0):
+ alpha_star = alpha1
+ phi_star = phi_a1
+ derphi_star = derphi_a1
+ break
+
+ if (derphi_a1 >= 0):
+ alpha_star, phi_star, derphi_star = \
+ _zoom(alpha1, alpha0, phi_a1,
+ phi_a0, derphi_a1, phi, derphi,
+ phi0, derphi0, c1, c2)
+ break
+
+ alpha2 = 2 * alpha1 # increase by factor of two on each iteration
+ i = i + 1
+ alpha0 = alpha1
+ alpha1 = alpha2
+ phi_a0 = phi_a1
+ phi_a1 = phi(alpha1)
+ derphi_a0 = derphi_a1
+
+ # stopping test if lower function not found
+ if i > maxiter:
+ alpha_star = alpha1
+ phi_star = phi_a1
+ derphi_star = None
+ break
+
+ return alpha_star, phi_star, phi0, derphi_star
+
+
+def _cubicmin(a,fa,fpa,b,fb,c,fc):
+ """
+ Finds the minimizer for a cubic polynomial that goes through the
+ points (a,fa), (b,fb), and (c,fc) with derivative at a of fpa.
+
+ If no minimizer can be found return None
+
+ """
+ # f(x) = A *(x-a)^3 + B*(x-a)^2 + C*(x-a) + D
+
+ C = fpa
+ D = fa
+ db = b-a
+ dc = c-a
+ if (db == 0) or (dc == 0) or (b==c): return None
+ denom = (db*dc)**2 * (db-dc)
+ d1 = np.empty((2,2))
+ d1[0,0] = dc**2
+ d1[0,1] = -db**2
+ d1[1,0] = -dc**3
+ d1[1,1] = db**3
+ [A,B] = np.dot(d1, np.asarray([fb-fa-C*db,fc-fa-C*dc]).flatten())
+ A /= denom
+ B /= denom
+ radical = B*B-3*A*C
+ if radical < 0: return None
+ if (A == 0): return None
+ xmin = a + (-B + np.sqrt(radical))/(3*A)
+ return xmin
+
+
+def _quadmin(a,fa,fpa,b,fb):
+ """
+ Finds the minimizer for a quadratic polynomial that goes through
+ the points (a,fa), (b,fb) with derivative at a of fpa,
+
+ """
+ # f(x) = B*(x-a)^2 + C*(x-a) + D
+ D = fa
+ C = fpa
+ db = b-a*1.0
+ if (db==0): return None
+ B = (fb-D-C*db)/(db*db)
+ if (B <= 0): return None
+ xmin = a - C / (2.0*B)
+ return xmin
+
+def _zoom(a_lo, a_hi, phi_lo, phi_hi, derphi_lo,
+ phi, derphi, phi0, derphi0, c1, c2):
+ """
+ Part of the optimization algorithm in `scalar_search_wolfe2`.
+ """
+
+ maxiter = 10
+ i = 0
+ delta1 = 0.2 # cubic interpolant check
+ delta2 = 0.1 # quadratic interpolant check
+ phi_rec = phi0
+ a_rec = 0
+ while 1:
+ # interpolate to find a trial step length between a_lo and
+ # a_hi Need to choose interpolation here. Use cubic
+ # interpolation and then if the result is within delta *
+ # dalpha or outside of the interval bounded by a_lo or a_hi
+ # then use quadratic interpolation, if the result is still too
+ # close, then use bisection
+
+ dalpha = a_hi-a_lo;
+ if dalpha < 0: a,b = a_hi,a_lo
+ else: a,b = a_lo, a_hi
+
+ # minimizer of cubic interpolant
+ # (uses phi_lo, derphi_lo, phi_hi, and the most recent value of phi)
+ #
+ # if the result is too close to the end points (or out of the
+ # interval) then use quadratic interpolation with phi_lo,
+ # derphi_lo and phi_hi if the result is stil too close to the
+ # end points (or out of the interval) then use bisection
+
+ if (i > 0):
+ cchk = delta1*dalpha
+ a_j = _cubicmin(a_lo, phi_lo, derphi_lo, a_hi, phi_hi, a_rec, phi_rec)
+ if (i==0) or (a_j is None) or (a_j > b-cchk) or (a_j < a+cchk):
+ qchk = delta2*dalpha
+ a_j = _quadmin(a_lo, phi_lo, derphi_lo, a_hi, phi_hi)
+ if (a_j is None) or (a_j > b-qchk) or (a_j < a+qchk):
+ a_j = a_lo + 0.5*dalpha
+
+ # Check new value of a_j
+
+ phi_aj = phi(a_j)
+ if (phi_aj > phi0 + c1*a_j*derphi0) or (phi_aj >= phi_lo):
+ phi_rec = phi_hi
+ a_rec = a_hi
+ a_hi = a_j
+ phi_hi = phi_aj
+ else:
+ derphi_aj = derphi(a_j)
+ if abs(derphi_aj) <= -c2*derphi0:
+ a_star = a_j
+ val_star = phi_aj
+ valprime_star = derphi_aj
+ break
+ if derphi_aj*(a_hi - a_lo) >= 0:
+ phi_rec = phi_hi
+ a_rec = a_hi
+ a_hi = a_lo
+ phi_hi = phi_lo
+ else:
+ phi_rec = phi_lo
+ a_rec = a_lo
+ a_lo = a_j
+ phi_lo = phi_aj
+ derphi_lo = derphi_aj
+ i += 1
+ if (i > maxiter):
+ a_star = a_j
+ val_star = phi_aj
+ valprime_star = None
+ break
+ return a_star, val_star, valprime_star
+
+
+#------------------------------------------------------------------------------
+# Armijo line and scalar searches
+#------------------------------------------------------------------------------
+
+def line_search_armijo(f, xk, pk, gfk, old_fval, args=(), c1=1e-4, alpha0=1):
+ """Minimize over alpha, the function ``f(xk+alpha pk)``.
+
+ Uses the interpolation algorithm (Armijo backtracking) as suggested by
+ Wright and Nocedal in 'Numerical Optimization', 1999, pg. 56-57
+
+ Returns
+ -------
+ alpha
+ f_count
+ f_val_at_alpha
+
+ """
+ xk = np.atleast_1d(xk)
+ fc = [0]
+
+ def phi(alpha1):
+ fc[0] += 1
+ return f(xk + alpha1*pk, *args)
+
+ if old_fval is None:
+ phi0 = phi(0.)
+ else:
+ phi0 = old_fval # compute f(xk) -- done in past loop
+
+ derphi0 = np.dot(gfk, pk)
+ alpha, phi1 = scalar_search_armijo(phi, phi0, derphi0, c1=c1, alpha0=alpha0)
+ return alpha, fc[0], phi1
+
+def line_search_BFGS(f, xk, pk, gfk, old_fval, args=(), c1=1e-4, alpha0=1):
+ """
+ Compatibility wrapper for `line_search_armijo`
+ """
+ r = line_search_armijo(f, xk, pk, gfk, old_fval, args=args, c1=c1,
+ alpha0=alpha0)
+ return r[0], r[1], 0, r[2]
+
+def scalar_search_armijo(phi, phi0, derphi0, c1=1e-4, alpha0=1, amin=0):
+ """Minimize over alpha, the function ``phi(alpha)``.
+
+ Uses the interpolation algorithm (Armijo backtracking) as suggested by
+ Wright and Nocedal in 'Numerical Optimization', 1999, pg. 56-57
+
+ alpha > 0 is assumed to be a descent direction.
+
+ Returns
+ -------
+ alpha
+ phi1
+
+ """
+ phi_a0 = phi(alpha0)
+ if phi_a0 <= phi0 + c1*alpha0*derphi0:
+ return alpha0, phi_a0
+
+ # Otherwise compute the minimizer of a quadratic interpolant:
+
+ alpha1 = -(derphi0) * alpha0**2 / 2.0 / (phi_a0 - phi0 - derphi0 * alpha0)
+ phi_a1 = phi(alpha1)
+
+ if (phi_a1 <= phi0 + c1*alpha1*derphi0):
+ return alpha1, phi_a1
+
+ # Otherwise loop with cubic interpolation until we find an alpha which
+ # satifies the first Wolfe condition (since we are backtracking, we will
+ # assume that the value of alpha is not too small and satisfies the second
+ # condition.
+
+ while alpha1 > amin: # we are assuming alpha>0 is a descent direction
+ factor = alpha0**2 * alpha1**2 * (alpha1-alpha0)
+ a = alpha0**2 * (phi_a1 - phi0 - derphi0*alpha1) - \
+ alpha1**2 * (phi_a0 - phi0 - derphi0*alpha0)
+ a = a / factor
+ b = -alpha0**3 * (phi_a1 - phi0 - derphi0*alpha1) + \
+ alpha1**3 * (phi_a0 - phi0 - derphi0*alpha0)
+ b = b / factor
+
+ alpha2 = (-b + np.sqrt(abs(b**2 - 3 * a * derphi0))) / (3.0*a)
+ phi_a2 = phi(alpha2)
+
+ if (phi_a2 <= phi0 + c1*alpha2*derphi0):
+ return alpha2, phi_a2
+
+ if (alpha1 - alpha2) > alpha1 / 2.0 or (1 - alpha2/alpha1) < 0.96:
+ alpha2 = alpha1 / 2.0
+
+ alpha0 = alpha1
+ alpha1 = alpha2
+ phi_a0 = phi_a1
+ phi_a1 = phi_a2
+
+ # Failed to find a suitable step length
+ return None, phi_a1
+
View
336 scipy/optimize/optimize.py
@@ -25,7 +25,9 @@
import numpy
from numpy import atleast_1d, eye, mgrid, argmin, zeros, shape, empty, \
squeeze, vectorize, asarray, absolute, sqrt, Inf, asfarray, isinf
-import linesearch
+from linesearch import \
+ line_search_BFGS, line_search_wolfe1, line_search_wolfe2, \
+ line_search_wolfe2 as line_search
# These have been copied from Numeric's MLab.py
# I don't think they made the transition to scipy_core
@@ -294,324 +296,6 @@ def fmin(func, x0, args=(), xtol=1e-4, ftol=1e-4, maxiter=None, maxfun=None,
return retlist
-
-def _cubicmin(a,fa,fpa,b,fb,c,fc):
- # finds the minimizer for a cubic polynomial that goes through the
- # points (a,fa), (b,fb), and (c,fc) with derivative at a of fpa.
- #
- # if no minimizer can be found return None
- #
- # f(x) = A *(x-a)^3 + B*(x-a)^2 + C*(x-a) + D
-
- C = fpa
- D = fa
- db = b-a
- dc = c-a
- if (db == 0) or (dc == 0) or (b==c): return None
- denom = (db*dc)**2 * (db-dc)
- d1 = empty((2,2))
- d1[0,0] = dc**2
- d1[0,1] = -db**2
- d1[1,0] = -dc**3
- d1[1,1] = db**3
- [A,B] = numpy.dot(d1,asarray([fb-fa-C*db,fc-fa-C*dc]).flatten())
- A /= denom
- B /= denom
- radical = B*B-3*A*C
- if radical < 0: return None
- if (A == 0): return None
- xmin = a + (-B + sqrt(radical))/(3*A)
- return xmin
-
-
-def _quadmin(a,fa,fpa,b,fb):
- # finds the minimizer for a quadratic polynomial that goes through
- # the points (a,fa), (b,fb) with derivative at a of fpa
- # f(x) = B*(x-a)^2 + C*(x-a) + D
- D = fa
- C = fpa
- db = b-a*1.0
- if (db==0): return None
- B = (fb-D-C*db)/(db*db)
- if (B <= 0): return None
- xmin = a - C / (2.0*B)
- return xmin
-
-def zoom(a_lo, a_hi, phi_lo, phi_hi, derphi_lo,
- phi, derphi, phi0, derphi0, c1, c2):
- maxiter = 10
- i = 0
- delta1 = 0.2 # cubic interpolant check
- delta2 = 0.1 # quadratic interpolant check
- phi_rec = phi0
- a_rec = 0
- while 1:
- # interpolate to find a trial step length between a_lo and
- # a_hi Need to choose interpolation here. Use cubic
- # interpolation and then if the result is within delta *
- # dalpha or outside of the interval bounded by a_lo or a_hi
- # then use quadratic interpolation, if the result is still too
- # close, then use bisection
-
- dalpha = a_hi-a_lo;
- if dalpha < 0: a,b = a_hi,a_lo
- else: a,b = a_lo, a_hi
-
- # minimizer of cubic interpolant
- # (uses phi_lo, derphi_lo, phi_hi, and the most recent value of phi)
- # if the result is too close to the end points (or out of
- # the interval) then use quadratic interpolation with
- # phi_lo, derphi_lo and phi_hi
- # if the result is stil too close to the end points (or
- # out of the interval) then use bisection
-
- if (i > 0):
- cchk = delta1*dalpha
- a_j = _cubicmin(a_lo, phi_lo, derphi_lo, a_hi, phi_hi, a_rec, phi_rec)
- if (i==0) or (a_j is None) or (a_j > b-cchk) or (a_j < a+cchk):
- qchk = delta2*dalpha
- a_j = _quadmin(a_lo, phi_lo, derphi_lo, a_hi, phi_hi)
- if (a_j is None) or (a_j > b-qchk) or (a_j < a+qchk):
- a_j = a_lo + 0.5*dalpha
-# print "Using bisection."
-# else: print "Using quadratic."
-# else: print "Using cubic."
-
- # Check new value of a_j
-
- phi_aj = phi(a_j)
- if (phi_aj > phi0 + c1*a_j*derphi0) or (phi_aj >= phi_lo):
- phi_rec = phi_hi
- a_rec = a_hi
- a_hi = a_j
- phi_hi = phi_aj
- else:
- derphi_aj = derphi(a_j)
- if abs(derphi_aj) <= -c2*derphi0:
- a_star = a_j
- val_star = phi_aj
- valprime_star = derphi_aj
- break
- if derphi_aj*(a_hi - a_lo) >= 0:
- phi_rec = phi_hi
- a_rec = a_hi
- a_hi = a_lo
- phi_hi = phi_lo
- else:
- phi_rec = phi_lo
- a_rec = a_lo
- a_lo = a_j
- phi_lo = phi_aj
- derphi_lo = derphi_aj
- i += 1
- if (i > maxiter):
- a_star = a_j
- val_star = phi_aj
- valprime_star = None
- break
- return a_star, val_star, valprime_star
-
-def line_search(f, myfprime, xk, pk, gfk, old_fval, old_old_fval,
- args=(), c1=1e-4, c2=0.9, amax=50):
- """Find alpha that satisfies strong Wolfe conditions.
-
- Parameters
- ----------
- f : callable f(x,*args)
- Objective function.
- myfprime : callable f'(x,*args)
- Objective function gradient (can be None).
- xk : ndarray
- Starting point.
- pk : ndarray
- Search direction.
- gfk : ndarray
- Gradient value for x=xk (xk being the current parameter
- estimate).
- args : tuple
- Additional arguments passed to objective function.
- c1 : float
- Parameter for Armijo condition rule.
- c2 : float
- Parameter for curvature condition rule.
-
- Returns
- -------
- alpha0 : float
- Alpha for which ``x_new = x0 + alpha * pk``.
- fc : int
- Number of function evaluations made.
- gc : int
- Number of gradient evaluations made.
-
- Notes
- -----
- Uses the line search algorithm to enforce strong Wolfe
- conditions. See Wright and Nocedal, 'Numerical Optimization',
- 1999, pg. 59-60.
-
- For the zoom phase it uses an algorithm by [...].
-
- """
-
- global _ls_fc, _ls_gc, _ls_ingfk
- _ls_fc = 0
- _ls_gc = 0
- _ls_ingfk = None
- def phi(alpha):
- global _ls_fc
- _ls_fc += 1
- return f(xk+alpha*pk,*args)
-
- if isinstance(myfprime,type(())):
- def phiprime(alpha):
- global _ls_fc, _ls_ingfk
- _ls_fc += len(xk)+1
- eps = myfprime[1]
- fprime = myfprime[0]
- newargs = (f,eps) + args
- _ls_ingfk = fprime(xk+alpha*pk,*newargs) # store for later use
- return numpy.dot(_ls_ingfk,pk)
- else:
- fprime = myfprime
- def phiprime(alpha):
- global _ls_gc, _ls_ingfk
- _ls_gc += 1
- _ls_ingfk = fprime(xk+alpha*pk,*args) # store for later use
- return numpy.dot(_ls_ingfk,pk)
-
- alpha0 = 0
- phi0 = old_fval
- derphi0 = numpy.dot(gfk,pk)
-
- alpha1 = pymin(1.0,1.01*2*(phi0-old_old_fval)/derphi0)
-
- if alpha1 == 0:
- # This shouldn't happen. Perhaps the increment has slipped below
- # machine precision? For now, set the return variables skip the
- # useless while loop, and raise warnflag=2 due to possible imprecision.
- alpha_star = None
- fval_star = old_fval
- old_fval = old_old_fval
- fprime_star = None
-
- phi_a1 = phi(alpha1)
- #derphi_a1 = phiprime(alpha1) evaluated below
-
- phi_a0 = phi0
- derphi_a0 = derphi0
-
- i = 1
- maxiter = 10
- while 1: # bracketing phase
- if alpha1 == 0:
- break
- if (phi_a1 > phi0 + c1*alpha1*derphi0) or \
- ((phi_a1 >= phi_a0) and (i > 1)):
- alpha_star, fval_star, fprime_star = \
- zoom(alpha0, alpha1, phi_a0,
- phi_a1, derphi_a0, phi, phiprime,
- phi0, derphi0, c1, c2)
- break
-
- derphi_a1 = phiprime(alpha1)
- if (abs(derphi_a1) <= -c2*derphi0):
- alpha_star = alpha1
- fval_star = phi_a1
- fprime_star = derphi_a1
- break
-
- if (derphi_a1 >= 0):
- alpha_star, fval_star, fprime_star = \
- zoom(alpha1, alpha0, phi_a1,
- phi_a0, derphi_a1, phi, phiprime,
- phi0, derphi0, c1, c2)
- break
-
- alpha2 = 2 * alpha1 # increase by factor of two on each iteration
- i = i + 1
- alpha0 = alpha1
- alpha1 = alpha2
- phi_a0 = phi_a1
- phi_a1 = phi(alpha1)
- derphi_a0 = derphi_a1
-
- # stopping test if lower function not found
- if (i > maxiter):
- alpha_star = alpha1
- fval_star = phi_a1
- fprime_star = None
- break
-
- if fprime_star is not None:
- # fprime_star is a number (derphi) -- so use the most recently
- # calculated gradient used in computing it derphi = gfk*pk
- # this is the gradient at the next step no need to compute it
- # again in the outer loop.
- fprime_star = _ls_ingfk
-
- return alpha_star, _ls_fc, _ls_gc, fval_star, old_fval, fprime_star
-
-
-def line_search_BFGS(f, xk, pk, gfk, old_fval, args=(), c1=1e-4, alpha0=1):
- """Minimize over alpha, the function ``f(xk+alpha pk)``.
-
- Uses the interpolation algorithm (Armiijo backtracking) as suggested by
- Wright and Nocedal in 'Numerical Optimization', 1999, pg. 56-57
-
- :Returns: (alpha, fc, gc)
-
- """
-
- xk = atleast_1d(xk)
- fc = 0
- phi0 = old_fval # compute f(xk) -- done in past loop
- phi_a0 = f(*((xk+alpha0*pk,)+args))
- fc = fc + 1
- derphi0 = numpy.dot(gfk,pk)
-
- if (phi_a0 <= phi0 + c1*alpha0*derphi0):
- return alpha0, fc, 0, phi_a0
-
- # Otherwise compute the minimizer of a quadratic interpolant:
-
- alpha1 = -(derphi0) * alpha0**2 / 2.0 / (phi_a0 - phi0 - derphi0 * alpha0)
- phi_a1 = f(*((xk+alpha1*pk,)+args))
- fc = fc + 1
-
- if (phi_a1 <= phi0 + c1*alpha1*derphi0):
- return alpha1, fc, 0, phi_a1
-
- # Otherwise loop with cubic interpolation until we find an alpha which
- # satifies the first Wolfe condition (since we are backtracking, we will
- # assume that the value of alpha is not too small and satisfies the second
- # condition.
-
- while 1: # we are assuming pk is a descent direction
- factor = alpha0**2 * alpha1**2 * (alpha1-alpha0)
- a = alpha0**2 * (phi_a1 - phi0 - derphi0*alpha1) - \
- alpha1**2 * (phi_a0 - phi0 - derphi0*alpha0)
- a = a / factor
- b = -alpha0**3 * (phi_a1 - phi0 - derphi0*alpha1) + \
- alpha1**3 * (phi_a0 - phi0 - derphi0*alpha0)
- b = b / factor
-
- alpha2 = (-b + numpy.sqrt(abs(b**2 - 3 * a * derphi0))) / (3.0*a)
- phi_a2 = f(*((xk+alpha2*pk,)+args))
- fc = fc + 1
-
- if (phi_a2 <= phi0 + c1*alpha2*derphi0):
- return alpha2, fc, 0, phi_a2
-
- if (alpha1 - alpha2) > alpha1 / 2.0 or (1 - alpha2/alpha1) < 0.96:
- alpha2 = alpha1 / 2.0
-
- alpha0 = alpha1
- alpha1 = alpha2
- phi_a0 = phi_a1
- phi_a1 = phi_a2
-
-
def approx_fprime(xk,f,epsilon,*args):
f0 = f(*((xk,)+args))
grad = numpy.zeros((len(xk),), float)
@@ -723,12 +407,12 @@ def fmin_bfgs(f, x0, fprime=None, args=(), gtol=1e-5, norm=Inf,
while (gnorm > gtol) and (k < maxiter):
pk = -numpy.dot(Hk,gfk)
alpha_k, fc, gc, old_fval, old_old_fval, gfkp1 = \
- linesearch.line_search(f,myfprime,xk,pk,gfk,
- old_fval,old_old_fval)
+ line_search_wolfe1(f,myfprime,xk,pk,gfk,
+ old_fval,old_old_fval)
if alpha_k is None: # line search failed try different one.
alpha_k, fc, gc, old_fval, old_old_fval, gfkp1 = \
- line_search(f,myfprime,xk,pk,gfk,
- old_fval,old_old_fval)
+ line_search_wolfe2(f,myfprime,xk,pk,gfk,
+ old_fval,old_old_fval)
if alpha_k is None:
# This line search also failed to find a better solution.
warnflag = 2
@@ -894,12 +578,12 @@ def fmin_cg(f, x0, fprime=None, args=(), gtol=1e-5, norm=Inf, epsilon=_epsilon,
old_old_fval_backup = old_old_fval
alpha_k, fc, gc, old_fval, old_old_fval, gfkp1 = \
- linesearch.line_search(f,myfprime,xk,pk,gfk,old_fval,
+ line_search_wolfe1(f,myfprime,xk,pk,gfk,old_fval,
old_old_fval,c2=0.4)
if alpha_k is None: # line search failed -- use different one.
alpha_k, fc, gc, old_fval, old_old_fval, gfkp1 = \
- line_search(f,myfprime,xk,pk,gfk,
- old_fval_backup,old_old_fval_backup)
+ line_search_wolfe2(f,myfprime,xk,pk,gfk,
+ old_fval_backup,old_old_fval_backup)
if alpha_k is None or alpha_k == 0:
# This line search also failed to find a better solution.
warnflag = 2
View
234 scipy/optimize/tests/test_linesearch.py
@@ -0,0 +1,234 @@
+"""
+Tests for line search routines
+"""
+
+from numpy.testing import *
+import scipy.optimize.linesearch as ls
+import numpy as np
+
+def assert_wolfe(s, phi, derphi, c1=1e-4, c2=0.9, err_msg=""):
+ """
+ Check that strong Wolfe conditions apply
+ """
+ phi1 = phi(s)
+ phi0 = phi(0)
+ derphi0 = derphi(0)
+ derphi1 = derphi(s)
+ msg = "s = %s; phi(0) = %s; phi(s) = %s; phi'(0) = %s; phi'(s) = %s; %s" % (
+ s, phi0, phi1, derphi0, derphi1, err_msg)
+
+ assert_(phi1 <= phi0 + c1*s*derphi0, "Wolfe 1 failed: "+ msg)
+ assert_(abs(derphi1) <= abs(c2*derphi0), "Wolfe 2 failed: "+ msg)
+
+def assert_armijo(s, phi, c1=1e-4, err_msg=""):
+ """
+ Check that Armijo condition applies
+ """
+ phi1 = phi(s)
+ phi0 = phi(0)
+ msg = "s = %s; phi(0) = %s; phi(s) = %s; %s" % (s, phi0, phi1, err_msg)
+ assert_(phi1 <= (1 - c1*s)*phi0, msg)
+
+def assert_line_wolfe(x, p, s, f, fprime, **kw):
+ assert_wolfe(s, phi=lambda sp: f(x + p*sp),
+ derphi=lambda sp: np.dot(fprime(x + p*sp), p), **kw)
+
+def assert_line_armijo(x, p, s, f, **kw):
+ assert_armijo(s, phi=lambda sp: f(x + p*sp), **kw)
+
+
+class TestLineSearch(object):
+ # -- scalar functions; must have dphi(0.) < 0
+ def _scalar_func_1(self, s):
+ self.fcount += 1
+ p = -s - s**3 + s**4
+ dp = -1 - 3*s**2 + 4*s**3
+ return p, dp
+
+ def _scalar_func_2(self, s):
+ self.fcount += 1
+ p = np.exp(-4*s) + s**2
+ dp = -4*np.exp(-4*s) + 2*s
+ return p, dp
+
+ def _scalar_func_3(self, s):
+ self.fcount += 1
+ p = -np.sin(10*s)
+ dp = -10*np.cos(10*s)
+ return p, dp
+
+ # -- n-d functions
+
+ def _line_func_1(self, x):
+ self.fcount += 1
+ f = np.dot(x, x)
+ df = 2*x
+ return f, df
+
+ def _line_func_2(self, x):
+ self.fcount += 1
+ f = np.dot(x, np.dot(self.A, x)) + 1
+ df = np.dot(self.A + self.A.T, x)
+ return f, df
+
+ # --
+
+ def __init__(self):
+ self.scalar_funcs = []
+ self.line_funcs = []
+ self.N = 20
+ self.fcount = 0
+
+ def bind_index(func, idx):
+ # Remember Python's closure semantics!
+ return lambda *a, **kw: func(*a, **kw)[idx]
+
+ for name in sorted(dir(self)):
+ if name.startswith('_scalar_func_'):
+ value = getattr(self, name)
+ self.scalar_funcs.append(
+ (name, bind_index(value, 0), bind_index(value, 1)))
+ elif name.startswith('_line_func_'):
+ value = getattr(self, name)
+ self.line_funcs.append(
+ (name, bind_index(value, 0), bind_index(value, 1)))
+
+ def setUp(self):
+ np.random.seed(1234)
+ self.A = np.random.randn(self.N, self.N)
+
+ def scalar_iter(self):
+ for name, phi, derphi in self.scalar_funcs:
+ for old_phi0 in np.random.randn(3):
+ yield name, phi, derphi, old_phi0
+
+ def line_iter(self):
+ for name, f, fprime in self.line_funcs:
+ k = 0
+ while k < 9:
+ x = np.random.randn(self.N)
+ p = np.random.randn(self.N)
+ if np.dot(p, fprime(x)) >= 0:
+ # always pick a descent direction
+ continue
+ k += 1
+ old_fv = float(np.random.randn())
+ yield name, f, fprime, x, p, old_fv
+
+ # -- Generic scalar searches
+
+ def test_scalar_search_wolfe1(self):
+ c = 0
+ for name, phi, derphi, old_phi0 in self.scalar_iter():
+ c += 1
+ s, phi1, phi0 = ls.scalar_search_wolfe1(phi, derphi, phi(0),
+ old_phi0, derphi(0))
+ assert_equal(phi0, phi(0))
+ assert_equal(phi1, phi(s))
+ assert_wolfe(s, phi, derphi)
+
+ assert c > 3 # check that the iterator really works...
+
+ def test_scalar_search_wolfe2(self):
+ for name, phi, derphi, old_phi0 in self.scalar_iter():
+ s, phi1, phi0, derphi1 = ls.scalar_search_wolfe2(
+ phi, derphi, phi(0), old_phi0, derphi(0))
+ assert_equal(phi0, phi(0), name)
+ assert_equal(phi1, phi(s), name)
+ if derphi1 is not None:
+ assert_equal(derphi1, derphi(s), name)
+ assert_wolfe(s, phi, derphi, err_msg="%s %g" % (name, old_phi0))
+
+ def test_scalar_search_armijo(self):
+ for name, phi, derphi, old_phi0 in self.scalar_iter():
+ s, phi1 = ls.scalar_search_armijo(phi, phi(0), derphi(0))
+ assert_equal(phi1, phi(s), name)
+ assert_armijo(s, phi, err_msg="%s %g" % (name, old_phi0))
+
+ # -- Generic line searches
+
+ def test_line_search_wolfe1(self):
+ c = 0
+ smax = 100
+ for name, f, fprime, x, p, old_f in self.line_iter():
+ f0 = f(x)
+ g0 = fprime(x)
+ self.fcount = 0
+ s, fc, gc, fv, ofv, gv = ls.line_search_wolfe1(f, fprime, x, p,
+ g0, f0, old_f,
+ amax=smax)
+ assert_equal(self.fcount, fc+gc)
+ assert_equal(ofv, f(x))
+ assert_equal(fv, f(x + s*p))
+ assert_equal(gv, fprime(x + s*p))
+ if s < smax:
+ c += 1
+ assert_line_wolfe(x, p, s, f, fprime, err_msg=name)
+
+ assert c > 3 # check that the iterator really works...
+
+ def test_line_search_wolfe2(self):
+ c = 0
+ smax = 100
+ for name, f, fprime, x, p, old_f in self.line_iter():
+ f0 = f(x)
+ g0 = fprime(x)
+ self.fcount = 0
+ s, fc, gc, fv, ofv, gv = ls.line_search_wolfe2(f, fprime, x, p,
+ g0, f0, old_f,
+ amax=smax)
+ assert_equal(self.fcount, fc+gc)
+ assert_equal(ofv, f(x))
+ assert_equal(fv, f(x + s*p))
+ if gv is not None:
+ assert_equal(gv, fprime(x + s*p))
+ if s < smax:
+ c += 1
+ assert_line_wolfe(x, p, s, f, fprime, err_msg=name)
+ assert c > 3 # check that the iterator really works...
+
+ def test_line_search_armijo(self):
+ c = 0
+ for name, f, fprime, x, p, old_f in self.line_iter():
+ f0 = f(x)
+ g0 = fprime(x)
+ self.fcount = 0
+ s, fc, fv = ls.line_search_armijo(f, x, p, g0, f0)
+ c += 1
+ assert_equal(self.fcount, fc)
+ assert_equal(fv, f(x + s*p))
+ assert_line_armijo(x, p, s, f, err_msg=name)
+ assert c >= 9
+
+ # -- More specific tests
+
+ def test_armijo_terminate_1(self):
+ # Armijo should evaluate the function only once if the trial step
+ # is already suitable
+ count = [0]
+ def phi(s):
+ count[0] += 1
+ return -s + 0.01*s**2
+ s, phi1 = ls.scalar_search_armijo(phi, phi(0), -1, alpha0=1)
+ assert_equal(s, 1)
+ assert_equal(count[0], 2)
+ assert_armijo(s, phi)
+
+ def test_wolfe_terminate(self):
+ # wolfe1 and wolfe2 should also evaluate the function only a few
+ # times if the trial step is already suitable
+
+ def phi(s):
+ count[0] += 1
+ return -s + 0.05*s**2
+
+ def derphi(s):
+ count[0] += 1
+ return -1 + 0.05*2*s
+
+ for func in [ls.scalar_search_wolfe1, ls.scalar_search_wolfe2]:
+ count = [0]
+ r = func(phi, derphi, phi(0), None, derphi(0))
+ assert r[0] is not None, (r, func)
+ assert count[0] <= 2 + 2, (count, func)
+ assert_wolfe(r[0], phi, derphi, err_msg=str(func))
View
14 scipy/optimize/tests/test_optimize.py
@@ -177,18 +177,10 @@ def test_ncg(self):
assert self.gradcalls == 18, self.gradcalls # 0.8.0
#assert self.gradcalls == 22, self.gradcalls # 0.7.0
- # Ensure that the function behaves the same;
-
- # # This is from Scipy 0.7.0
- #assert np.allclose(self.trace[3:5],
- # [[-4.35700753e-07, -5.24869435e-01, 4.87527480e-01],
- # [-4.35700753e-07, -5.24869401e-01, 4.87527774e-01]],
- # atol=1e-14, rtol=1e-7), self.trace[3:5]
-
- # This if from Scipy 0.8.0; with some fixes in ncg
+ # Ensure that the function behaves the same; this is from Scipy 0.7.0
assert np.allclose(self.trace[3:5],
- [[-2.90334653e-07,-5.24869431e-01,4.87527470e-01],
- [-2.90334653e-07,-5.24869375e-01,4.87527680e-01]],
+ [[-4.35700753e-07, -5.24869435e-01, 4.87527480e-01],
+ [-4.35700753e-07, -5.24869401e-01, 4.87527774e-01]],
atol=1e-14, rtol=1e-7), self.trace[3:5]
Please sign in to comment.
Something went wrong with that request. Please try again.