ENH: Improve arg handling & enhance test suite for np.pad

numpy · Jan 7, 2015 · 29464a9 · 29464a9
1 parent 3ef77ee
commit 29464a9
Show file tree

Hide file tree

Showing 3 changed files with 580 additions and 65 deletions.
diff --git a/doc/release/1.10.0-notes.rst b/doc/release/1.10.0-notes.rst
@@ -73,6 +73,12 @@ given period and one end point is added to each extremity of *xp* in order to
 close the previous and the next period cycles, resulting in the correct
 interpolation behavior.
 
+*np.pad* supports more input types for ``pad_width`` and ``constant_values``
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+``constant_values`` parameters now accepts NumPy arrays and float values.
+NumPy arrays are supported as input for ``pad_width``, and an exception is
+raised if its values are not of integral type.
+
 
 Changes
 =======

diff --git a/numpy/lib/arraypad.py b/numpy/lib/arraypad.py
@@ -6,7 +6,6 @@
 from __future__ import division, absolute_import, print_function
 
 import numpy as np
-from numpy.compat import long
 
 
 __all__ = ['pad']
@@ -47,7 +46,7 @@ def _arange_ndarray(arr, shape, axis, reverse=False):
 
     """
     initshape = tuple(1 if i != axis else shape[axis]
-                       for (i, x) in enumerate(arr.shape))
+                      for (i, x) in enumerate(arr.shape))
     if not reverse:
         padarr = np.arange(1, shape[axis] + 1)
     else:
@@ -779,7 +778,7 @@ def _pad_ref(arr, pad_amt, method, axis=-1):
     Notes
     -----
     This algorithm does not pad with repetition, i.e. the edges are not
-    repeated in the reflection. For that behavior, use `method='symmetric'`.
+    repeated in the reflection. For that behavior, use `mode='symmetric'`.
 
     The modes 'reflect', 'symmetric', and 'wrap' must be padded with a
     single function, lest the indexing tricks in non-integer multiples of the
@@ -864,7 +863,7 @@ def _pad_sym(arr, pad_amt, method, axis=-1):
     Notes
     -----
     This algorithm DOES pad with repetition, i.e. the edges are repeated.
-    For a method that does not repeat edges, use `method='reflect'`.
+    For padding without repeated edges, use `mode='reflect'`.
 
     The modes 'reflect', 'symmetric', and 'wrap' must be padded with a
     single function, lest the indexing tricks in non-integer multiples of the
@@ -987,7 +986,7 @@ def _pad_wrap(arr, pad_amt, axis=-1):
     return np.concatenate((wrap_chunk1, arr, wrap_chunk2), axis=axis)
 
 
-def _normalize_shape(narray, shape):
+def _normalize_shape(ndarray, shape, cast_to_int=True):
     """
     Private function which does some checks and normalizes the possibly
     much simpler representations of 'pad_width', 'stat_length',
@@ -997,52 +996,77 @@ def _normalize_shape(narray, shape):
     ----------
     narray : ndarray
         Input ndarray
-    shape : {sequence, int}, optional
-        The width of padding (pad_width) or the number of elements on the
-        edge of the narray used for statistics (stat_length).
+    shape : {sequence, array_like, float, int}, optional
+        The width of padding (pad_width), the number of elements on the
+        edge of the narray used for statistics (stat_length), the constant
+        value(s) to use when filling padded regions (constant_values), or the
+        endpoint target(s) for linear ramps (end_values).
         ((before_1, after_1), ... (before_N, after_N)) unique number of
         elements for each axis where `N` is rank of `narray`.
         ((before, after),) yields same before and after constants for each
         axis.
-        (constant,) or int is a shortcut for before = after = constant for
+        (constant,) or val is a shortcut for before = after = constant for
         all axes.
+    cast_to_int : bool, optional
+        Controls if values in ``shape`` will be rounded and cast to int
+        before being returned.
 
     Returns
     -------
-    _normalize_shape : tuple of tuples
-        int                               => ((int, int), (int, int), ...)
-        [[int1, int2], [int3, int4], ...] => ((int1, int2), (int3, int4), ...)
-        ((int1, int2), (int3, int4), ...) => no change
-        [[int1, int2], ]                  => ((int1, int2), (int1, int2), ...)
-        ((int1, int2), )                  => ((int1, int2), (int1, int2), ...)
-        [[int ,     ], ]                  => ((int, int), (int, int), ...)
-        ((int ,     ), )                  => ((int, int), (int, int), ...)
+    normalized_shape : tuple of tuples
+        val                               => ((val, val), (val, val), ...)
+        [[val1, val2], [val3, val4], ...] => ((val1, val2), (val3, val4), ...)
+        ((val1, val2), (val3, val4), ...) => no change
+        [[val1, val2], ]                  => ((val1, val2), (val1, val2), ...)
+        ((val1, val2), )                  => ((val1, val2), (val1, val2), ...)
+        [[val ,     ], ]                  => ((val, val), (val, val), ...)
+        ((val ,     ), )                  => ((val, val), (val, val), ...)
 
     """
-    normshp = None
-    shapelen = len(np.shape(narray))
-    if (isinstance(shape, int)) or shape is None:
-        normshp = ((shape, shape), ) * shapelen
-    elif (isinstance(shape, (tuple, list))
-            and isinstance(shape[0], (tuple, list))
-            and len(shape) == shapelen):
-        normshp = shape
-        for i in normshp:
-            if len(i) != 2:
-                fmt = "Unable to create correctly shaped tuple from %s"
-                raise ValueError(fmt % (normshp,))
-    elif (isinstance(shape, (tuple, list))
-            and isinstance(shape[0], (int, float, long))
-            and len(shape) == 1):
-        normshp = ((shape[0], shape[0]), ) * shapelen
-    elif (isinstance(shape, (tuple, list))
-            and isinstance(shape[0], (int, float, long))
-            and len(shape) == 2):
-        normshp = (shape, ) * shapelen
-    if normshp is None:
+    ndims = ndarray.ndim
+
+    # Shortcut shape=None
+    if shape is None:
+        return ((None, None), ) * ndims
+
+    # Convert any input `info` to a NumPy array
+    arr = np.asarray(shape)
+
+    # Switch based on what input looks like
+    if arr.ndim <= 1:
+        if arr.shape == () or arr.shape == (1,):
+            # Single scalar input
+            #   Create new array of ones, multiply by the scalar
+            arr = np.ones((ndims, 2), dtype=ndarray.dtype) * arr
+        elif arr.shape == (2,):
+            # Apply padding (before, after) each axis
+            #   Create new axis 0, repeat along it for every axis
+            arr = arr[np.newaxis, :].repeat(ndims, axis=0)
+        else:
+            fmt = "Unable to create correctly shaped tuple from %s"
+            raise ValueError(fmt % (shape,))
+
+    elif arr.ndim == 2:
+        if arr.shape[1] == 1 and arr.shape[0] == ndims:
+            # Padded before and after by the same amount
+            arr = arr.repeat(2, axis=1)
+        elif arr.shape[0] == ndims:
+            # Input correctly formatted, pass it on as `arr`
+            arr = shape
+        else:
+            fmt = "Unable to create correctly shaped tuple from %s"
+            raise ValueError(fmt % (shape,))
+
+    else:
         fmt = "Unable to create correctly shaped tuple from %s"
         raise ValueError(fmt % (shape,))
-    return normshp
+
+    # Cast if necessary
+    if cast_to_int is True:
+        arr = np.round(arr).astype(int)
+
+    # Convert list of lists to tuple of tuples
+    return tuple(tuple(axis) for axis in arr.tolist())
 
 
 def _validate_lengths(narray, number_elements):
@@ -1098,7 +1122,7 @@ def pad(array, pad_width, mode=None, **kwargs):
     ----------
     array : array_like of rank N
         Input array
-    pad_width : {sequence, int}
+    pad_width : {sequence, array_like, int}
         Number of values padded to the edges of each axis.
         ((before_1, after_1), ... (before_N, after_N)) unique pad widths
         for each axis.
@@ -1227,13 +1251,13 @@ def pad(array, pad_width, mode=None, **kwargs):
     Examples
     --------
     >>> a = [1, 2, 3, 4, 5]
-    >>> np.lib.pad(a, (2,3), 'constant', constant_values=(4,6))
+    >>> np.lib.pad(a, (2,3), 'constant', constant_values=(4, 6))
     array([4, 4, 1, 2, 3, 4, 5, 6, 6, 6])
 
-    >>> np.lib.pad(a, (2,3), 'edge')
+    >>> np.lib.pad(a, (2, 3), 'edge')
     array([1, 1, 1, 2, 3, 4, 5, 5, 5, 5])
 
-    >>> np.lib.pad(a, (2,3), 'linear_ramp', end_values=(5,-4))
+    >>> np.lib.pad(a, (2, 3), 'linear_ramp', end_values=(5, -4))
     array([ 5,  3,  1,  2,  3,  4,  5,  2, -1, -4])
 
     >>> np.lib.pad(a, (2,), 'maximum')
@@ -1245,7 +1269,7 @@ def pad(array, pad_width, mode=None, **kwargs):
     >>> np.lib.pad(a, (2,), 'median')
     array([3, 3, 1, 2, 3, 4, 5, 3, 3])
 
-    >>> a = [[1,2], [3,4]]
+    >>> a = [[1, 2], [3, 4]]
     >>> np.lib.pad(a, ((3, 2), (2, 3)), 'minimum')
     array([[1, 1, 1, 2, 1, 1, 1],
            [1, 1, 1, 2, 1, 1, 1],
@@ -1256,19 +1280,19 @@ def pad(array, pad_width, mode=None, **kwargs):
            [1, 1, 1, 2, 1, 1, 1]])
 
     >>> a = [1, 2, 3, 4, 5]
-    >>> np.lib.pad(a, (2,3), 'reflect')
+    >>> np.lib.pad(a, (2, 3), 'reflect')
     array([3, 2, 1, 2, 3, 4, 5, 4, 3, 2])
 
-    >>> np.lib.pad(a, (2,3), 'reflect', reflect_type='odd')
+    >>> np.lib.pad(a, (2, 3), 'reflect', reflect_type='odd')
     array([-1,  0,  1,  2,  3,  4,  5,  6,  7,  8])
 
-    >>> np.lib.pad(a, (2,3), 'symmetric')
+    >>> np.lib.pad(a, (2, 3), 'symmetric')
     array([2, 1, 1, 2, 3, 4, 5, 5, 4, 3])
 
-    >>> np.lib.pad(a, (2,3), 'symmetric', reflect_type='odd')
+    >>> np.lib.pad(a, (2, 3), 'symmetric', reflect_type='odd')
     array([0, 1, 1, 2, 3, 4, 5, 5, 6, 7])
 
-    >>> np.lib.pad(a, (2,3), 'wrap')
+    >>> np.lib.pad(a, (2, 3), 'wrap')
     array([4, 5, 1, 2, 3, 4, 5, 1, 2, 3])
 
     >>> def padwithtens(vector, pad_width, iaxis, kwargs):
@@ -1277,7 +1301,7 @@ def pad(array, pad_width, mode=None, **kwargs):
     ...     return vector
 
     >>> a = np.arange(6)
-    >>> a = a.reshape((2,3))
+    >>> a = a.reshape((2, 3))
 
     >>> np.lib.pad(a, 2, padwithtens)
     array([[10, 10, 10, 10, 10, 10, 10],
@@ -1287,6 +1311,8 @@ def pad(array, pad_width, mode=None, **kwargs):
            [10, 10, 10, 10, 10, 10, 10],
            [10, 10, 10, 10, 10, 10, 10]])
     """
+    if not np.asarray(pad_width).dtype.kind == 'i':
+        raise TypeError('`pad_width` must be of integral type.')
 
     narray = np.array(array)
     pad_width = _validate_lengths(narray, pad_width)
@@ -1327,7 +1353,8 @@ def pad(array, pad_width, mode=None, **kwargs):
             if i == 'stat_length':
                 kwargs[i] = _validate_lengths(narray, kwargs[i])
             if i in ['end_values', 'constant_values']:
-                kwargs[i] = _normalize_shape(narray, kwargs[i])
+                kwargs[i] = _normalize_shape(narray, kwargs[i],
+                                             cast_to_int=False)
     elif mode is None:
         raise ValueError('Keyword "mode" must be a function or one of %s.' %
                          (list(allowedkwargs.keys()),))
@@ -1420,18 +1447,13 @@ def pad(array, pad_width, mode=None, **kwargs):
             method = kwargs['reflect_type']
             safe_pad = newmat.shape[axis] - 1
             while ((pad_before > safe_pad) or (pad_after > safe_pad)):
-                offset = 0
                 pad_iter_b = min(safe_pad,
                                  safe_pad * (pad_before // safe_pad))
                 pad_iter_a = min(safe_pad, safe_pad * (pad_after // safe_pad))
                 newmat = _pad_ref(newmat, (pad_iter_b,
                                            pad_iter_a), method, axis)
                 pad_before -= pad_iter_b
                 pad_after -= pad_iter_a
-                if pad_iter_b > 0:
-                    offset += 1
-                if pad_iter_a > 0:
-                    offset += 1
                 safe_pad += pad_iter_b + pad_iter_a
             newmat = _pad_ref(newmat, (pad_before, pad_after), method, axis)