Skip to content

ufunc and binop overrides and behavior suggestions

Marten van Kerkwijk edited this page May 29, 2015 · 31 revisions

Synopsis

This page lists several suggestions on how __numpy_ufunc__, __array_priority__, ndarray subclassing, and Python binary operations should work together in the future.

The discussion is ongoing in gh-5844. As sometimes code speaks more than words, and it is clumsy to revise an evolving suggestion in Github discussion thread, the latest suggestions can be kept here.

Let us assign a name and a revision number for each suggestion. Increment revision number on each edit.

ufunc.2

class MyNdarray(object):
    _known_classes = (np.ndarray,)

    def _can_handle(self, other):
        if isinstance(other, _known_classes + (MyNdarray,)):
            return True
        if hasattr(other, "__numpy_ufunc__"):
            return False
        return True

    def __add__(self, other):
        return np.add(self, other)
    def __radd__(self, other):
        return np.add(other, self)
    def __iadd__(self, other):
        return np.add(self, other, out=self)

    # ... repeat for other binops ...

    def __numpy_ufunc__(self, ufunc, method, args, kwargs):
        if method == "__call__":
            return self._numpy_ufunc_call(ufunc, *args, **kwargs)
        elif method == "reduce":
            return self._numpy_ufunc_reduce(ufunc, *args, **kwargs)
        else:
            raise NotImplementedError("it's just an example ok")

    def _numpy_ufunc_call(self, ufunc, *args, out=None, **kwargs):
        new_args = []
        for arg in args:
            if not self._can_handle(arg):
                return NotImplemented
            new_args.append(np.asarray(arg))
        args = new_args
        if out is not None:
            if not isinstance(out, tuple):
                out = (out,)
            new_out = []
            for arg in out:
                if not self._can_handle(arg):
                    return NotImplemented
                new_out.append(np.asarray(arg))
            out = tuple(new_out)
        return ufunc(*args, out=out, **kwargs)

    def _numpy_ufunc_reduce(self, a, out=None, **kwargs):
        if not self._can_handle(a):
            return NotImplemented
        else:
            a = np.asarray(a)
        if out is not None:
            if not self._can_handle(out):
                return NotImplemented
            else:
                out = np.asarray(out)
        return ufunc.reduce(a, out=out, **kwargs)

opt-out.3

class MyNdarray(object):
    __numpy_binop_override__ = True

    _known_classes = (np.ndarray,)

    def _can_handle(self, other):
        if isinstance(other, self._known_classes + (MyNdarray,)):
            return True
        if getattr(other, "__numpy_binop_override__", False):
            return False
        # potential addition/alternative: consider presence of
        # __numpy_ufunc__ as an opt-out
        return True

    def __add__(self, other):
        return self.__numpy_ufunc__(self.add, "__call__", (self, other), {})

    def __radd__(self, other):
        return self.__numpy_ufunc__(self.add, "__call__", (other, self), {})

    def __iadd__(self, other):
        # IF we want to never give `other` the opportunity to do an out-of-place
        # operation:
        return np.add(self, other, out=self)
        # OTHERWISE:
        return self.__numpy_ufunc__(self.add, "__call__", (self, other), out=self)

    # ... repeat for every binop ...

    # Implementation of __numpy_ufunc__ is the same as in ufunc.1 above

    def __numpy_ufunc__(self, ufunc, method, args, kwargs):
        if method == "__call__":
            return self._numpy_ufunc_call(ufunc, *args, **kwargs)
        elif method == "reduce":
            return self._numpy_ufunc_reduce(ufunc, *args, **kwargs)
        else:
            raise NotImplementedError("it's just an example ok")

    def _numpy_ufunc_call(self, ufunc, *args, out=None, **kwargs):
        new_args = []
        for arg in args:
            if not self._can_handle(arg):
                return NotImplemented
            new_args.append(np.asarray(arg))
        args = new_args
        if out is not None:
            if not isinstance(out, tuple):
                out = (out,)
            new_out = []
            for arg in out:
                if not self._can_handle(arg):
                    return NotImplemented
                new_out.append(np.asarray(arg))
            out = tuple(new_out)
        return ufunc(*args, out=out, **kwargs)

    def _numpy_ufunc_reduce(self, a, out=None, **kwargs):
        if not self._can_handle(a):
            return NotImplemented
        else:
            a = np.asarray(a)
        if out is not None:
            if not self._can_handle(out):
                return NotImplemented
            else:
                out = np.asarray(out)
        return ufunc.reduce(a, out=out, **kwargs)

treat-as-ndarray-subclass.2

Here, I first try to define how ndarray itself behaves, and then give an example of a container subclass as well as a different array-like (the latter is nearly identical to opt-out above).

(update 2: correct MaskedArray)

class ndarray():
    def _can_handle(self, other):
        if isinstance(self, other):  # we're a subclass (or the same), so of course we can handle it.
            return True
        # Strict subclasses of us and objects defining `__numpy_ufunc__` get treated identically.
        if isinstance(other, type(self)) or hasattr(other, '__numpy_ufunc__'):
            return False
        # backwards compatibility
        if getattr(other, '__array_priority__', 0) > 0:
            return False
        return True

    def _can_handle_as_output(self, other):
        return isinstance(other, type(self)) and self._can_handle(other)

    def __add__(self, other):
        return self.__numpy_ufunc__(np.add, "__call__", 0, (self, other), {})

    def __radd__(self, other):
        return self.__numpy_ufunc__(np.add, "__call__", 1, (other, self), {})

    def __iadd__(self, other):
        return self.__numpy_ufunc__(np.add, "__call__", 0, (self, other), out=self)

    # ... repeat for every binop ...
    def __numpy_ufunc__(self, ufunc, method, i, args, **kwargs):
        new_args = []
        for arg in args:
            if not self._can_handle(arg):
                return NotImplemented
            new_args.append(np.asarray(arg))
        args = new_args
        if kwargs.pop('out', None) is not None:
            if not isinstance(out, tuple):
                out = (out,)
            new_out = []
            for arg in out:
                if not self._can_handle_as_output(arg):
                    return NotImplemented
                new_out.append(np.asarray(arg))
            kwargs['out'] = tuple(new_out)
        # All arguments are now guaranteed to be ndarray, so could call into
        # function beyond any checks for `__numpy_ufunc__`, etc.
        kwargs['ndarray_only'] = True
        # Note that we are guaranteed to work here, so we do not have to worry
        # about catching possible TypeError
        return getattr(ufunc, method)(*args, **kwargs)


class ufunc():
    def __init__(self, ndarray_only_routine):
        # the ndarray_only_routine is the basic code that can only handle ndarray, i.e.,
        # does not do any coercing any more.
        self.ndarray_only_routine = ndarray_only_routine

    def __call__(self, *args, **kwargs):
        return self.execute('__call__', *args, **kwargs)

    def execute(self, method, *args, **kwargs):
        # 
        <some code that ensures that args only contains inputs and kwargs everything else>
        #
        if kwargs.pop('ndarray_only', False):
            return self.ndarray_only_routine(method, *args, **kwargs)

        # we may have non-ndarray instances; try __numpy_ufunc__ until something succeeds
        out = kwargs.get('out', ())
        if not isinstance(out, tuple):
            out = (out,)

        result = None
        for i, arg in enumerate(args + out):
            if hasattr(arg, '__numpy_ufunc__'):
                result = arg.__numpy_ufunc__(self, method, i, args, **kwargs)
                if result is not NotImplemented:
                    return result

        if result is None:
            # None of the arguments were array-like (e.g., np.add(1., 1.); so nothing has been tried
            # yet.  Use ndarray to see if the arguments can be converted.
            result = ndarray.__numpy_ufunc__(self, method, len(args), *args, **kwargs)
            if result is not NotImplemented:
                return result

        raise TypeError
         

add = ufunc(ndarray_only_add)


class MaskedArray(ndarray):
    # A container class that defers everything except masks to ndarray
    def __numpy_ufunc__(self, ufunc, method, i, args, **kwargs):
        new_args = []
        masks = []
        for arg in args:
            if hasattr(arg, 'mask'):  # Duck-typing
                masks.append(arg.mask)
                new_args.append(arg.data)
            else:
                masks.append(None)
                new_args.append(arg)
        args = new_args
        if kwargs.pop('out', None) is not None:
            if not isinstance(out, tuple):
                out = (out,)
            new_out = []
            for arg in out:
                if type(out) is type(self):
                    new_out.append(arg.data)
                else:
                    new_out.append(arg)
            kwargs['out'] = tuple(new_out)
        # All arguments are now guaranteed not to be type(self) any more, so we call
        # the ufunc to deal with possible other types (e.g., if self.data defined
        # __numpy_ufunc__, or one of the other arguments had one). If this failes,
        # we return NotImplemented, since possibly the other argument can handle
        # MaskedArray even though it cannot handle our content (seems unlikely, but
        # let's be proper).
        try:
            result = getattr(ufunc, method)(*args, **kwargs)
        except TypeError:
            return NotImplemented

        # Alternatively (possibly more logical), we just let our content try.
        result = self.data.__numpy_ufunc__(ufunc, method, i, args, **kwargs)
        if result is NotImplemented:
            return NotImplemented

        # ignore multiple outputs here for this example's sake.
        if result is not out:
            out = result.view(type(self))
        out.mask = self.combine_masks(masks)
        return out

class MyNdarray(object):
    _known_classes = (np.ndarray,)

    def _can_handle(self, other):
        if isinstance(other, self._known_classes + (self.__class__,)):
            return True
        if hasattr(other, "__numpy_ufunc__"):
            return False
        return True

    # These methods are defined just like for `ndarray`
    def __add__(self, other):
        return self.__numpy_ufunc__(np.add, "__call__", (self, other), {})

    def __radd__(self, other):
        return self.__numpy_ufunc__(np.add, "__call__", (other, self), {})

    def __iadd__(self, other):
        return self.__numpy_ufunc__(np.add, "__call__", (self, other), out=self)

    # ... repeat for every binop ...

    # Implementation of __numpy_ufunc__ is the same as in ufunc.1 above.  This non-container
    # class insists it has to know how to deal with other classes rather than change itself into
    # an ndarray and let other classes try.
    def __numpy_ufunc__(self, ufunc, method, args, kwargs):
        if method == "__call__":
            return self._numpy_ufunc_call(ufunc, *args, **kwargs)
        elif method == "reduce":
            return self._numpy_ufunc_reduce(ufunc, *args, **kwargs)
        else:
            raise NotImplementedError("it's just an example ok")

    def _numpy_ufunc_call(self, ufunc, *args, out=None, **kwargs):
        new_args = []
        for arg in args:
            if not self._can_handle(arg):
                return NotImplemented
            new_args.append(np.asarray(arg))
        args = new_args
        if out is not None:
            if not isinstance(out, tuple):
                out = (out,)
            new_out = []
            for arg in out:
                if not self._can_handle(arg):
                    return NotImplemented
                new_out.append(np.asarray(arg))
            out = tuple(new_out)

        result = ufunc(*args, out=out, **kwargs)
        <possible stuff to turn result into type(self)>
        return result

    def _numpy_ufunc_reduce(self, a, out=None, **kwargs):
        if not self._can_handle(a):
            return NotImplemented
        else:
            a = np.asarray(a)
        if out is not None:
            if not self._can_handle(out):
                return NotImplemented
            else:
                out = np.asarray(out)
        return ufunc.reduce(a, out=out, **kwargs)

ufunc-wrapper.1

class UfuncWrapper:
    def __init__(self, values):
        # values should be an array-like object
        self.values = values

    def __numpy_ufunc__(self, ufunc, method, i, inputs, **kwargs):
        # replace self with self.values before calling the ufunc again
        inputs = tuple(x.values if x is self else x for x in inputs)
        if out is not None:
            if not isinstance(out, tuple):
                out = (out,)
            out = tuple(x.values if x is self else x for x in out)
        # do the computation on unwrapped arrays
        result = getattr(ufunc, method)(*inputs, **kwargs)
        # now wrap the result
        return type(self)(result)

    # binary ops are defined by calling ufuncs
    def __add__(self, other):
        return np.add(self, other)
    def __radd__(self, other):
        return np.add(other, self)
    def __iadd__(self, other):
        return np.add(self, other, out=self)

    # repeat for all binary ops... (could use a standard mixin)

opt-out-wrapper.1

class OptOutWrapper:
    # __init__ and __numpy_ufunc__ are defined as on UfuncWrapper

    # add any necessary opt-out flags

    def __init__(self, values):
        # values should be an array-like object
        self.values = values

    def __numpy_ufunc__(self, ufunc, method, i, inputs, out=None, **kwargs):
        # replace self with self.values before calling the ufunc again
        inputs = tuple(x.values if x is self else x for x in inputs)
        if out is not None:
            if not isinstance(out, tuple):
                out = (out,)
            out = tuple(x.values if x is self else x for x in out)
        # do the computation on unwrapped arrays
        result = getattr(ufunc, method)(*inputs, out=out, **kwargs)
        # now wrap the result
        return type(self)(result)

    def _binary_op(self, op, other, reflexive=False):
        # we need to write another wrapper for binary operations to properly handle
        # passing off to the wrapped object
        if reflexive:
            result = op(other, self.values)
        else:
            result = op(self.values, other)
        return type(self)(result)

    # binary ops use special _binary_op wrapper
    def __add__(self, other):
        return self._binary_op(operator.add, other)
    def __iadd__(self, other):
        return self._binary_op(operator.iadd, other)
    def __radd__(self, other):
        return self._binary_op(operator.add, other, reflexive=True)

    # repeat for all binary ops...