Skip to content

Commit

Permalink
ENH: Add initial estimate of inverse hessian to BFGS
Browse files Browse the repository at this point in the history
  • Loading branch information
adler-j committed Oct 5, 2016
1 parent 5b22e42 commit ff744bc
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 8 deletions.
17 changes: 12 additions & 5 deletions examples/solvers/lbfgs_tomography.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
# Discrete reconstruction space: discretized functions on the rectangle
# [-20, 20]^2 with 300 samples per dimension.
reco_space = odl.uniform_discr(
min_pt=[-20, -20], max_pt=[20, 20], shape=[200, 200], dtype='float32')
min_pt=[-20, -20], max_pt=[20, 20], shape=[200, 200], dtype='float64')

# Make a parallel beam geometry with flat detector
# Angles: uniformly spaced, n = 360, min = 0, max = 2 * pi
Expand Down Expand Up @@ -64,20 +64,27 @@

# Create sinogram of forward projected phantom with noise
data = ray_trafo(discr_phantom)
# data += odl.phantom.white_noise(ray_trafo.range) * np.mean(data) * 0.1
data += odl.phantom.white_noise(ray_trafo.range) * np.mean(data) * 0.1

# --- Set up optimization problem and solve --- #

# Create objective functional
obj_fun = odl.solvers.L2NormSquared(ray_trafo.range) * (ray_trafo - data)

# Create line search
line_search = odl.solvers.BacktrackingLineSearch(obj_fun)
line_search = 1.0
# line_search = odl.solvers.BacktrackingLineSearch(obj_fun)

# Create initial estimate of the inverse hessian by a diagonal estimate
opnorm = odl.power_method_opnorm(ray_trafo)
hessinv_estimate = odl.ScalingOperator(reco_space, 1 / opnorm**2)

# Optionally pass callback to the solver to display intermediate results
callback = (odl.solvers.CallbackPrintIteration() &
odl.solvers.CallbackShow())

# Pick parameters
maxiter = 30
maxiter = 20
maxcor = 5 # only save some vectors (Limited memory)

# Choose a starting point
Expand All @@ -86,7 +93,7 @@
# Run the algorithm
odl.solvers.bfgs_method(
obj_fun, x, line_search=line_search, maxiter=maxiter, maxcor=maxcor,
callback=callback)
hessinv_estimate=hessinv_estimate, callback=callback)

# Display images
discr_phantom.show(title='original image')
Expand Down
17 changes: 14 additions & 3 deletions odl/solvers/smooth/newton.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
# TODO: update all docs


def _bfgs_multiply(s, y, x):
def _bfgs_multiply(s, y, x, hessinv_estimate=None):
"""Computes ``Hn^-1(x)`` for the L-BFGS method.
Parameters
Expand All @@ -45,6 +45,8 @@ def _bfgs_multiply(s, y, x):
The ``y`` coefficients in the BFGS update, see Notes.
x : `LinearSpaceElement`
Point in which to evaluate the product.
hessinv_estimate : `Operator`, optional
Initial estimate of the hessian ``H0^-1``.
Notes
-----
Expand All @@ -56,6 +58,8 @@ def _bfgs_multiply(s, y, x):
H_{n}^{-1}
\\left(I - \\frac{ y_n s_n^T}{y_n^T s_n} \\right) +
\\frac{s_n s_n^T}{y_n^T \, s_n}
With :math:`H_0^{-1}` given by ``hess_estimate``.
"""
assert len(s) == len(y)

Expand All @@ -68,6 +72,9 @@ def _bfgs_multiply(s, y, x):
alphas[i] = rhos[i] * (s[i].inner(r))
r.lincomb(1, r, -alphas[i], y[i])

if hessinv_estimate is not None:
r = hessinv_estimate(r)

for i in range(len(s)):
beta = rhos[i] * (y[i].inner(r))
r.lincomb(1, r, alphas[i] - beta, s[i])
Expand Down Expand Up @@ -178,7 +185,7 @@ def newtons_method(f, x, line_search=1.0, maxiter=1000, tol=1e-16,


def bfgs_method(f, x, line_search=1.0, maxiter=1000, tol=1e-16, maxcor=None,
callback=None):
hessinv_estimate=None, callback=None):
"""Quasi-Newton BFGS method to minimize a differentiable function.
Can use either the regular BFGS method, or the limited memory BFGS method.
Expand Down Expand Up @@ -225,6 +232,10 @@ def bfgs_method(f, x, line_search=1.0, maxiter=1000, tol=1e-16, maxcor=None,
Maximum number of correction factors to store. If ``None``, the method
is the regular BFGS method. If an integer, the method becomes the
Limited Memory BFGS method.
hessinv_estimate : `Operator`, optional
Initial estimate of the inverse of the Hessian operator. Needs to be an
operator from ``f.domain`` to ``f.domain``.
Default: Identity on ``f.domain``
callback : `callable`, optional
Object executing code per iteration, e.g. plotting each iterate.
"""
Expand All @@ -242,7 +253,7 @@ def bfgs_method(f, x, line_search=1.0, maxiter=1000, tol=1e-16, maxcor=None,
grad_x = grad(x)
for _ in range(maxiter):
# Determine a stepsize using line search
search_dir = -_bfgs_multiply(ys, ss, grad_x)
search_dir = -_bfgs_multiply(ys, ss, grad_x, hessinv_estimate)
dir_deriv = search_dir.inner(grad_x)
if np.abs(dir_deriv) < tol:
return # we found an optimum
Expand Down

0 comments on commit ff744bc

Please sign in to comment.