Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

(Not so) minor optimizations #1012

Merged
merged 13 commits into from Jun 6, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 0 additions & 1 deletion odl/discr/discretization.py
Expand Up @@ -535,7 +535,6 @@ class DiscretizedSpaceElement(DiscretizedSetElement, FnBaseVector):

def __init__(self, space, data):
"""Initialize a new instance."""
assert isinstance(space, DiscretizedSpace)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fully agree, I think I remove all of these in the #861 PR

DiscretizedSetElement.__init__(self, space, data)

def __ipow__(self, p):
Expand Down
43 changes: 33 additions & 10 deletions odl/discr/grid.py
Expand Up @@ -185,6 +185,9 @@ def __init__(self, *coord_vectors):
raise ValueError('vector {} contains duplicates'
''.format(i + 1))

# Lazily evaluates strides when needed but stores the result
self.__stride = None

self.__coord_vectors = vecs

# Non-degenerate axes
Expand Down Expand Up @@ -381,24 +384,44 @@ def stride(self):
If the grid contains axes that are not uniform, ``stride`` has
a ``NaN`` entry.

For degenerate (length 1) axes, ``stride`` has value ``0.0``.

Returns
-------
stride : numpy.array
Array of dtype ``float`` and length `ndim`.

Examples
--------
>>> rg = uniform_grid([-1.5, -1], [-0.5, 3], (2, 3))
>>> rg.stride
array([ 1., 2.])

NaN returned for non-uniform dimension:

>>> g = RectGrid([0, 1, 2], [0, 1, 4])
>>> g.stride
array([ 1., nan])
array([ 1., nan])

0.0 returned for degenerate dimension:

>>> g = RectGrid([0, 1, 2], [0])
>>> g.stride
array([ 1., 0.])
"""
strd = []
for i in range(self.ndim):
if not self.is_uniform_byaxis[i]:
strd.append(float('nan'))
elif self.nondegen_byaxis[i]:
strd.append(self.extent[i] / (self.shape[i] - 1.0))
else:
strd.append(0.0)
return np.array(strd)
# Cache for efficiency instead of re-computing
if self.__stride is None:
strd = []
for i in range(self.ndim):
if not self.is_uniform_byaxis[i]:
strd.append(float('nan'))
elif self.nondegen_byaxis[i]:
strd.append(self.extent[i] / (self.shape[i] - 1.0))
else:
strd.append(0.0)
self.__stride = np.array(strd)

return self.__stride.copy()

@property
def extent(self):
Expand Down
2 changes: 2 additions & 0 deletions odl/operator/operator.py
Expand Up @@ -20,6 +20,7 @@
import sys

from odl.set import LinearSpace, LinearSpaceElement, Set, Field
from odl.util import cache_arguments


__all__ = ('Operator', 'OperatorComp', 'OperatorSum', 'OperatorVectorSum',
Expand Down Expand Up @@ -123,6 +124,7 @@ def _signature_from_spec(func):
return '{}({})'.format(func.__name__, argstr)


@cache_arguments
def _dispatch_call_args(cls=None, bound_call=None, unbound_call=None,
attr='_call'):
"""Check the arguments of ``_call()`` or similar for conformity.
Expand Down
3 changes: 2 additions & 1 deletion odl/operator/pspace_ops.py
Expand Up @@ -887,7 +887,8 @@ def _call(self, x, out=None):
return self.prod_op(x)[0]
else:
wrapped_out = self.prod_op.range.element([out], cast=False)
return self.prod_op(x, out=wrapped_out)
result = self.prod_op(x, out=wrapped_out)
return result[0]

def derivative(self, x):
"""Derivative of the reduction operator.
Expand Down
52 changes: 41 additions & 11 deletions odl/solvers/nonsmooth/douglas_rachford.py
Expand Up @@ -160,32 +160,62 @@ def douglas_rachford_pd(x, f, g, L, tau, sigma, niter,
w1 = x.space.zero()
w2 = [Li.range.zero() for Li in L]

# Temporaries (not in original article)
tmp_domain = x.space.zero()

for k in range(niter):
tmp_1 = sum(Li.adjoint(vi) for Li, vi in zip(L, v))
tmp_1.lincomb(1, x, -tau / 2, tmp_1)
f.proximal(tau)(tmp_1, out=p1)
lam_k = lam(k)

if len(L) > 0:
# Compute tmp_domain = sum(Li.adjoint(vi) for Li, vi in zip(L, v))
L[0].adjoint(v[0], out=tmp_domain)
for Li, vi in zip(L[1:], v[1:]):
Li.adjoint(vi, out=p1)
tmp_domain += p1

tmp_domain.lincomb(1, x, -tau / 2, tmp_domain)
else:
tmp_domain.set_zero()

f.proximal(tau)(tmp_domain, out=p1)
w1.lincomb(2, p1, -1, x)

for i in range(m):
prox_cc_g[i](sigma[i])(v[i] + (sigma[i] / 2.0) * L[i](w1),
out=p2[i])
tmp = v[i] + (sigma[i] / 2.0) * L[i](w1)
prox_cc_g[i](sigma[i])(tmp, out=p2[i])
w2[i].lincomb(2.0, p2[i], -1, v[i])

tmp_2 = sum(Li.adjoint(wi) for Li, wi in zip(L, w2))
z1.lincomb(1.0, w1, - (tau / 2.0), tmp_2)
x += lam(k) * (z1 - p1)
if len(L) > 0:
# Compute:
# tmp_domain = sum(Li.adjoint(w2i) for Li, w2i in zip(L, w2))
L[0].adjoint(w2[0], out=tmp_domain)
for Li, w2i in zip(L[1:], w2[1:]):
Li.adjoint(w2i, out=z1)
tmp_domain += z1
else:
tmp_domain.set_zero()

z1.lincomb(1.0, w1, - (tau / 2.0), tmp_domain)

# Compute x += lam(k) * (z1 - p1)
x.lincomb(1, x, lam_k, z1)
x.lincomb(1, x, -lam_k, p1)

tmp_domain.lincomb(2, z1, -1, w1)
for i in range(m):
tmp = w2[i] + (sigma[i] / 2.0) * L[i](2.0 * z1 - w1)
if l is not None:
# In this case the infimal convolution is used.
tmp = w2[i] + (sigma[i] / 2.0) * L[i](tmp_domain)
prox_cc_l[i](sigma[i])(tmp, out=z2[i])
else:
# If the infimal convolution is not given, prox_cc_l is the
# identity and hence omitted. For more details, see the
# documentation.
z2[i].assign(tmp)
v[i] += lam(k) * (z2[i] - p2[i])
z2[i].lincomb(1, w2[i], sigma[i] / 2.0, L[i](tmp_domain))

# Compute v[i] += lam(k) * (z2[i] - p2[i])
v[i].lincomb(1, v[i], lam_k, z2[i])
v[i].lincomb(1, v[i], -lam_k, p2[i])

if callback is not None:
callback(p1)
Expand Down
3 changes: 3 additions & 0 deletions odl/solvers/nonsmooth/proximal_operators.py
Expand Up @@ -28,6 +28,7 @@
ConstantOperator, DiagonalOperator)
from odl.space import ProductSpace
from odl.set import LinearSpaceElement
from odl.util import cache_arguments


__all__ = ('combine_proximals', 'proximal_cconj', 'proximal_translation',
Expand Down Expand Up @@ -224,6 +225,7 @@ def proximal_arg_scaling(prox_factory, scaling):
if scaling == 0:
return proximal_const_func(prox_factory(1.0).domain)

@cache_arguments
def arg_scaling_prox_factory(sigma):
"""Create proximal for the translation with a given sigma.

Expand Down Expand Up @@ -295,6 +297,7 @@ def proximal_quadratic_perturbation(prox_factory, a, u=None):
raise TypeError('`u` must be `None` or a `LinearSpaceElement` '
'instance, got {!r}.'.format(u))

@cache_arguments
def quadratic_perturbation_prox_factory(sigma):
"""Create proximal for the quadratic perturbation with a given sigma.

Expand Down
84 changes: 46 additions & 38 deletions odl/space/npy_ntuples.py
Expand Up @@ -491,81 +491,89 @@ def _blas_is_applicable(*args):
for x in args))


def _lincomb(a, x1, b, x2, out, dtype):
def _lincomb_impl(a, x1, b, x2, out, dtype):
"""Raw linear combination depending on data type."""

# Define thresholds for when different implementations should be used
threshold_small = 100
threshold_medium = 50000

# Convert to native since BLAS needs it
size = native(x1.size)

# Shortcut for small problems
if x1.size < 100: # small array optimization
if size <= threshold_small: # small array optimization
out.data[:] = a * x1.data + b * x2.data
return

# Use blas for larger problems
def fallback_axpy(x1, x2, n, a):
"""Fallback axpy implementation avoiding copy."""
if a != 0:
x2 /= a
x2 += x1
x2 *= a
return x2

def fallback_scal(a, x, n):
"""Fallback scal implementation."""
x *= a
return x

def fallback_copy(x1, x2, n):
"""Fallback copy implementation."""
x2[...] = x1[...]
return x2

if _blas_is_applicable(x1, x2, out):
# If data is very big, use BLAS if possible
if size > threshold_medium and _blas_is_applicable(x1, x2, out):
axpy, scal, copy = linalg.blas.get_blas_funcs(
['axpy', 'scal', 'copy'], arrays=(x1.data, x2.data, out.data))
else:
# Use fallbacks otherwise
def fallback_axpy(x1, x2, n, a):
"""Fallback axpy implementation avoiding copy."""
if a != 0:
x2 /= a
x2 += x1
x2 *= a
return x2

def fallback_scal(a, x, n):
"""Fallback scal implementation."""
x *= a
return x

def fallback_copy(x1, x2, n):
"""Fallback copy implementation."""
x2[...] = x1[...]
return x2

axpy, scal, copy = (fallback_axpy, fallback_scal, fallback_copy)

if x1 is x2 and b != 0:
# x1 is aligned with x2 -> out = (a+b)*x1
_lincomb(a + b, x1, 0, x1, out, dtype)
_lincomb_impl(a + b, x1, 0, x1, out, dtype)
elif out is x1 and out is x2:
# All the vectors are aligned -> out = (a+b)*out
scal(a + b, out.data, native(out.size))
scal(a + b, out.data, size)
elif out is x1:
# out is aligned with x1 -> out = a*out + b*x2
if a != 1:
scal(a, out.data, native(out.size))
scal(a, out.data, size)
if b != 0:
axpy(x2.data, out.data, native(out.size), b)
axpy(x2.data, out.data, size, b)
elif out is x2:
# out is aligned with x2 -> out = a*x1 + b*out
if b != 1:
scal(b, out.data, native(out.size))
scal(b, out.data, size)
if a != 0:
axpy(x1.data, out.data, native(out.size), a)
axpy(x1.data, out.data, size, a)
else:
# We have exhausted all alignment options, so x1 != x2 != out
# We now optimize for various values of a and b
if b == 0:
if a == 0: # Zero assignment -> out = 0
out.data[:] = 0
else: # Scaled copy -> out = a*x1
copy(x1.data, out.data, native(out.size))
copy(x1.data, out.data, size)
if a != 1:
scal(a, out.data, native(out.size))
scal(a, out.data, size)
else:
if a == 0: # Scaled copy -> out = b*x2
copy(x2.data, out.data, native(out.size))
copy(x2.data, out.data, size)
if b != 1:
scal(b, out.data, native(out.size))
scal(b, out.data, size)

elif a == 1: # No scaling in x1 -> out = x1 + b*x2
copy(x1.data, out.data, native(out.size))
axpy(x2.data, out.data, native(out.size), b)
copy(x1.data, out.data, size)
axpy(x2.data, out.data, size, b)
else: # Generic case -> out = a*x1 + b*x2
copy(x2.data, out.data, native(out.size))
copy(x2.data, out.data, size)
if b != 1:
scal(b, out.data, native(out.size))
axpy(x1.data, out.data, native(out.size), a)
scal(b, out.data, size)
axpy(x1.data, out.data, size, a)


class NumpyFn(FnBase, NumpyNtuples):
Expand Down Expand Up @@ -811,7 +819,7 @@ def _lincomb(self, a, x1, b, x2, out):
>>> out
cn(3).element([(10-2j), (17-1j), (18.5+1.5j)])
"""
_lincomb(a, x1, b, x2, out, self.dtype)
_lincomb_impl(a, x1, b, x2, out, self.dtype)

def _dist(self, x1, x2):
"""Calculate the distance between two vectors.
Expand Down
30 changes: 30 additions & 0 deletions odl/test/solvers/nonsmooth/douglas_rachford_test.py
Expand Up @@ -95,6 +95,36 @@ def test_primal_dual_l1():
assert all_almost_equal(x, data_1, places=2)


def test_primal_dual_no_operator():
"""Verify that the correct value is returned when there is no operator.

Solves the optimization problem

min_x ||x - data_1||_1

which has optimum value data_1.
"""

# Define the space
space = odl.rn(5)

# Operator
L = []

# Data
data_1 = odl.util.testutils.noise_element(space)

# Proximals
f = odl.solvers.L1Norm(space).translated(data_1)
g = []

# Solve with f term dominating
x = space.zero()
douglas_rachford_pd(x, f, g, L, tau=3.0, sigma=[], niter=10)

assert all_almost_equal(x, data_1, places=2)


def test_primal_dual_with_li():
"""Test for the forward-backward solver with infimal convolution.

Expand Down
9 changes: 6 additions & 3 deletions odl/trafos/util/ft_utils.py
Expand Up @@ -386,7 +386,7 @@ def _interp_kernel_ft(norm_freqs, interp):
if interp_ == 'nearest':
pass
elif interp_ == 'linear':
ker_ft **= 2
ker_ft *= ker_ft
else:
raise ValueError("`interp` '{}' not understood".format(interp))

Expand Down Expand Up @@ -536,10 +536,13 @@ def dft_postprocess_data(arr, real_grid, recip_grid, shift, axes,
freqs = np.linspace(fmin, fmax, num=len_dft)
stride = real_grid.stride[ax]

interp_kernel = _interp_kernel_ft(freqs, intp)
interp_kernel *= stride

if op == 'multiply':
onedim_arr *= stride * _interp_kernel_ft(freqs, intp)
onedim_arr *= interp_kernel
else:
onedim_arr /= stride * _interp_kernel_ft(freqs, intp)
onedim_arr /= interp_kernel

onedim_arrs.append(onedim_arr.astype(out.dtype, copy=False))

Expand Down