ENH: Add initial estimate of inverse hessian to BFGS

odlgroup · Oct 5, 2016 · ff744bc · ff744bc
1 parent 5b22e42
commit ff744bc
Show file tree

Hide file tree

Showing 2 changed files with 26 additions and 8 deletions.
diff --git a/examples/solvers/lbfgs_tomography.py b/examples/solvers/lbfgs_tomography.py
@@ -35,7 +35,7 @@
 # Discrete reconstruction space: discretized functions on the rectangle
 # [-20, 20]^2 with 300 samples per dimension.
 reco_space = odl.uniform_discr(
-    min_pt=[-20, -20], max_pt=[20, 20], shape=[200, 200], dtype='float32')
+    min_pt=[-20, -20], max_pt=[20, 20], shape=[200, 200], dtype='float64')
 
 # Make a parallel beam geometry with flat detector
 # Angles: uniformly spaced, n = 360, min = 0, max = 2 * pi
@@ -64,20 +64,27 @@
 
 # Create sinogram of forward projected phantom with noise
 data = ray_trafo(discr_phantom)
-# data += odl.phantom.white_noise(ray_trafo.range) * np.mean(data) * 0.1
+data += odl.phantom.white_noise(ray_trafo.range) * np.mean(data) * 0.1
+
+# --- Set up optimization problem and solve --- #
 
 # Create objective functional
 obj_fun = odl.solvers.L2NormSquared(ray_trafo.range) * (ray_trafo - data)
 
 # Create line search
-line_search = odl.solvers.BacktrackingLineSearch(obj_fun)
+line_search = 1.0
+# line_search = odl.solvers.BacktrackingLineSearch(obj_fun)
+
+# Create initial estimate of the inverse hessian by a diagonal estimate
+opnorm = odl.power_method_opnorm(ray_trafo)
+hessinv_estimate = odl.ScalingOperator(reco_space, 1 / opnorm**2)
 
 # Optionally pass callback to the solver to display intermediate results
 callback = (odl.solvers.CallbackPrintIteration() &
             odl.solvers.CallbackShow())
 
 # Pick parameters
-maxiter = 30
+maxiter = 20
 maxcor = 5  # only save some vectors (Limited memory)
 
 # Choose a starting point
@@ -86,7 +93,7 @@
 # Run the algorithm
 odl.solvers.bfgs_method(
     obj_fun, x, line_search=line_search, maxiter=maxiter, maxcor=maxcor,
-    callback=callback)
+    hessinv_estimate=hessinv_estimate, callback=callback)
 
 # Display images
 discr_phantom.show(title='original image')

diff --git a/odl/solvers/smooth/newton.py b/odl/solvers/smooth/newton.py
@@ -34,7 +34,7 @@
 # TODO: update all docs
 
 
-def _bfgs_multiply(s, y, x):
+def _bfgs_multiply(s, y, x, hessinv_estimate=None):
     """Computes ``Hn^-1(x)`` for the L-BFGS method.
 
     Parameters
@@ -45,6 +45,8 @@ def _bfgs_multiply(s, y, x):
         The ``y`` coefficients in the BFGS update, see Notes.
     x : `LinearSpaceElement`
         Point in which to evaluate the product.
+    hessinv_estimate : `Operator`, optional
+        Initial estimate of the hessian ``H0^-1``.
 
     Notes
     -----
@@ -56,6 +58,8 @@ def _bfgs_multiply(s, y, x):
         H_{n}^{-1}
         \\left(I - \\frac{ y_n s_n^T}{y_n^T s_n} \\right) +
         \\frac{s_n s_n^T}{y_n^T \, s_n}
+
+    With :math:`H_0^{-1}` given by ``hess_estimate``.
     """
     assert len(s) == len(y)
 
@@ -68,6 +72,9 @@ def _bfgs_multiply(s, y, x):
         alphas[i] = rhos[i] * (s[i].inner(r))
         r.lincomb(1, r, -alphas[i], y[i])
 
+    if hessinv_estimate is not None:
+        r = hessinv_estimate(r)
+
     for i in range(len(s)):
         beta = rhos[i] * (y[i].inner(r))
         r.lincomb(1, r, alphas[i] - beta, s[i])
@@ -178,7 +185,7 @@ def newtons_method(f, x, line_search=1.0, maxiter=1000, tol=1e-16,
 
 
 def bfgs_method(f, x, line_search=1.0, maxiter=1000, tol=1e-16, maxcor=None,
-                callback=None):
+                hessinv_estimate=None, callback=None):
     """Quasi-Newton BFGS method to minimize a differentiable function.
 
     Can use either the regular BFGS method, or the limited memory BFGS method.
@@ -225,6 +232,10 @@ def bfgs_method(f, x, line_search=1.0, maxiter=1000, tol=1e-16, maxcor=None,
         Maximum number of correction factors to store. If ``None``, the method
         is the regular BFGS method. If an integer, the method becomes the
         Limited Memory BFGS method.
+    hessinv_estimate : `Operator`, optional
+        Initial estimate of the inverse of the Hessian operator. Needs to be an
+        operator from ``f.domain`` to ``f.domain``.
+        Default: Identity on ``f.domain``
     callback : `callable`, optional
         Object executing code per iteration, e.g. plotting each iterate.
     """
@@ -242,7 +253,7 @@ def bfgs_method(f, x, line_search=1.0, maxiter=1000, tol=1e-16, maxcor=None,
     grad_x = grad(x)
     for _ in range(maxiter):
         # Determine a stepsize using line search
-        search_dir = -_bfgs_multiply(ys, ss, grad_x)
+        search_dir = -_bfgs_multiply(ys, ss, grad_x, hessinv_estimate)
         dir_deriv = search_dir.inner(grad_x)
         if np.abs(dir_deriv) < tol:
             return  # we found an optimum