From e4b64c61f66dce9a96de05bbc1edb899a94aa7b9 Mon Sep 17 00:00:00 2001
From: mscaudill <mscaudill@gmail.com>
Date: Tue, 29 Aug 2023 09:57:54 -0500
Subject: [PATCH] [refactor]: move operatives to protools in core as they must
 prove themselves useful before placement in a public module (like tools) of
 openseize, passing pylint, mypy, doctest, codespell

---
 src/openseize/core/protools.py                | 127 ++++--
 src/openseize/tools/operatives.py             | 366 ------------------
 .../{test_operatives.py => test_protools.py}  |  14 +-
 3 files changed, 112 insertions(+), 395 deletions(-)
 delete mode 100644 src/openseize/tools/operatives.py
 rename tests/{test_operatives.py => test_protools.py} (92%)

diff --git a/src/openseize/core/protools.py b/src/openseize/core/protools.py
index ee6c95c..241e4c1 100644
--- a/src/openseize/core/protools.py
+++ b/src/openseize/core/protools.py
@@ -18,6 +18,7 @@
 
 from typing import Optional, Tuple, Union
 from functools import partial
+from itertools import zip_longest
 
 import numpy as np
 import numpy.typing as npt
@@ -189,16 +190,17 @@ def multiply_along_axis(pro: Producer,
                         arr: npt.NDArray,
                         axis: int,
 ) -> Producer:
-    """Multiplies each produced array of a producer by a 1-D array along a
-    single axis.
+    """Multiplies produced arrays by a 1-D array along a single axis.
 
     Args:
         pro:
             A producer of ndarrays to be multiplied along axis.
         arr:
-            A 1-D array whose length must match producers shape along axis.
+            A 1-D array whose length must match producers length along a single
+            axis.
         axis:
-            The axis along which to multiply.
+            The axis along which to multiply. This function supports
+            multiplication along any single axis including the production axis.
 
     Examples:
         >>> x = np.arange(10000).reshape(2, 4, 1250)
@@ -214,16 +216,11 @@ def multiply_along_axis(pro: Producer,
     """
 
     arr = np.array(arr)
-    
-    # FIXME I need to take care of when multiplication is along producing axis
-    if axis == pro.axis:
-        if len(arr) != pro.shape[pro.axis]:
-            msg = ('Multiplication along the production axis requires '
-                  'length of arr to match chunksize {} != {}.')
-            raise ValueError(msg.format(len(arr), pro.chunksize))
+    if arr.ndim > 1:
+        raise ValueError('Dimensions of multiplier arr must be exactly 1.')
 
     # ensure the arr shape matches the producers shape along axis
-    elif len(arr) != pro.shape[axis]:
+    if len(arr) != pro.shape[axis]:
         msg = 'operands could not be broadcast together with shapes {} {}'
         raise ValueError(msg.format(pro.shape, arr.shape))
 
@@ -231,29 +228,115 @@ def multiply_along_axis(pro: Producer,
     ndims = len(pro.shape)
     shape = np.ones(ndims, dtype=int)
     shape[axis] = len(arr)
-    y = arr.reshape(shape)
+    x = arr.reshape(shape) #type: Union[npt.NDArray, Producer]
+
+    # if multiplying along pro axis convert arr 'x' to producer
+    if axis == pro.axis:
+        x = producer(x, chunksize=pro.chunksize, axis=pro.axis)
 
-    func = partial(_multiply_gen, pro, y)
+    func = partial(_multiply_gen, pro, x)
     return producer(func, chunksize=pro.chunksize, axis=pro.axis,
                     shape=pro.shape)
 
 
-def _multiply_gen(pro, arr):
+def _multiply_gen(pro, multiplier):
     """A generating helper function that multiplies produced arrays by an
-    ndarray.
+    ndarray or producer of ndarrays.
 
     This helper function is a generating function (not a producer) and is not
-    intended to be called externally.
+    intended to be called externally. It assumes that multipliers shape is
+    broadcastable to producers shape.
 
     Args:
         pro:
             A producer of ndarrays.
-        arr:
-            An ndarray of the same dims as each produced array.
+        multiplier:
+            An ndarray or a producer of ndarrays. The number of dims of this
+            object must match the dims of pro and have shape of 1 along all axes
+            except 1 axis whose length must equal the length of the producer
+            along this axis.
 
     Yields:
-        The element-wise product of each produced array with arr.
+        The element-wise product of each produced array with multiplier.
+    """
+
+    # non-production axis multiplication factors
+    factors = zip_longest(pro, multiplier, fillvalue=multiplier)
+
+    # production axis multiplication factors
+    if isinstance(multiplier, Producer):
+        factors = zip(pro, multiplier)
+
+    for arr, mult in factors:
+        yield arr * mult
+
+
+def slice_along_axis(pro: Producer,
+                     start: Optional[int] = None,
+                     stop: Optional[int] = None,
+                     step: Optional[int] = None,
+                     axis: int = -1,
+) -> Producer:
+    """Returns a producer producing values between start and stop in step
+    increments along axis.
+
+    Args:
+        pro:
+            A producer instance to slice along axis.
+        start:
+            The start index of the slice along axis. If None, slice will start
+            at 0.
+        stop:
+            The stop index of the slice along axis. If None slice will extend to
+            last element(s) of producer along axis.
+        step:
+            The size of index steps between start and stop of slice.
+        axis:
+            The axis of the producer to be sliced.
+
+    Examples:
+        >>> x = np.random.random((4,10000))
+        >>> pro = producer(x, chunksize=1000, axis=-1)
+        >>> sliced_pro = slice_along_axis(pro, 100, 200)
+        >>> np.allclose(x[:,100:200], sliced_pro.to_array())
+        True
+
+    Returns:
+        A producer of ndarrays.
     """
 
-    for x in pro:
-        yield x * arr
+    # get start, stop, step indices for the slicing axis
+    start, stop, step  = slice(start, stop, step).indices(pro.shape[axis])
+
+    if axis == pro.axis:
+        # slicing along production axis is just masking
+        mask = np.zeros(pro.shape[axis], dtype=bool)
+        mask[start:stop:step] = True
+        return producer(pro, pro.chunksize, pro.axis, mask=mask)
+
+    # slicing along non-production axis changes shape of produced arrays
+    new_shape = list(pro.shape)
+    new_shape[axis] =  (stop - start) // step
+    func = partial(_slice_along_gen, pro, start, stop, step, axis)
+    return producer(func, pro.chunksize, pro.axis, shape=new_shape)
+
+
+def _slice_along_gen(pro, start, stop, step, axis):
+    """A generating helper function for slicing a producer along
+    a non-production axis between start and stop in step increments.
+
+    Args:
+        pro:
+            A producer instance to slice.
+        start:
+            The start index of the slice. May be None.
+        stop:
+            The stop index of the slice. May be None.
+        step:
+            The step size between start and stop to slice with. May be None.
+        axis:
+            The non-production axis along which to slice.
+    """
+
+    for arr in pro:
+        yield arraytools.slice_along_axis(arr, start, stop, step, axis=axis)
diff --git a/src/openseize/tools/operatives.py b/src/openseize/tools/operatives.py
deleted file mode 100644
index 4c40922..0000000
--- a/src/openseize/tools/operatives.py
+++ /dev/null
@@ -1,366 +0,0 @@
-"""A collection of tools to manipulate the size, shape or values produced by
-a producer including:
-
-    pad:
-        A function to pre and post pad a producer along a single axis.
-    expand_dims:
-        A function that expands a producers shape by axis insertion.
-    multiply_along_axis:
-        A function that multiplies produced values by a 1-D numpy array along
-        a single axis.
-    slice_along_axis:
-        A function that slices a producer along any axis.
-
-Note: To support concurrency all functions in this module are available at the
-module level. Functions not intended to be called externally are marked as
-protected with a single underscore.
-"""
-
-from typing import Optional, Tuple, Union
-from functools import partial
-from itertools import zip_longest
-
-import numpy as np
-import numpy.typing as npt
-from openseize import producer
-from openseize.core import arraytools
-from openseize.core.producer import Producer
-
-
-def pad(pro: Producer,
-        amt: Union[int, Tuple[int, int]],
-        axis: int,
-        value: Optional[float] = 0,
-) -> Producer:
-    """Pads the edges of a producer along single axis with a constant value.
-
-    Args:
-        pro:
-            A producer of ndarrays whose edges along axis are to be padded.
-        amt:
-            The number of pads to apply before the 0th element & after the
-            last element along axis. If int, amt number of values will be
-            prepended & appended to axis.
-        axis:
-            The axis of produced values along which to pad.
-        value: float
-            The constant value to pad the producer with. Defaults to zero.
-
-    Examples:
-        >>> x = np.arange(1000).reshape(4, 250)
-        >>> pro = producer(x, chunksize=100, axis=-1)
-        >>> padded_pro = pad(pro, [3, 10], axis=-1)
-        >>> padded_pro.shape
-        (4, 263)
-        >>> np.allclose(np.pad(x, [(0,0), (3, 10)]), padded_pro.to_array())
-        True
-
-    Returns:
-        A new producer padded with value along axis.
-
-    Notes:
-        This padding is less sophisticated than numpy as openseize only allows
-        constant pre and post padding. Future versions will likely improve this.
-    """
-
-    amts = (amt, amt) if isinstance(amt, int) else tuple(amt)
-
-    # dispatch to generating function based on whether pad is along pro.axis
-    if axis == pro.axis:
-        genfunc = _production_axis_padder
-    else:
-        genfunc = _other_axis_padder
-
-    # build a partial generating function and compute the return pros shape
-    func = partial(genfunc, pro, amts, axis, value)
-    new_shape = list(pro.shape)
-    new_shape[axis] = pro.shape[axis] + sum(amts)
-
-    return producer(func, pro.chunksize, pro.axis, shape=new_shape)
-
-
-def _production_axis_padder(pro, amt, axis, value):
-    """A generating function that pads a producer along its axis with value.
-
-    Padding a producer along its production axis only changes the first and last
-    produced arrays. For argument definitions see pad.
-    """
-
-    left_shape, right_shape = list(pro.shape), list(pro.shape)
-    left_shape[axis] = amt[0]
-    right_shape[axis] = amt[1]
-
-    # create the arrays to pad left and right along axis
-    left, right = [value * np.ones(s) for s in (left_shape, right_shape)]
-
-    yield left
-
-    for arr in pro:
-        yield arr
-
-    yield right
-
-
-def _other_axis_padder(pro, amt, axis, value):
-    """A generating func. that pads a producer along any non-production axis.
-
-    Padding a producer along a non-production axis changes the shape of all
-    produced arrays.
-    """
-
-    for arr in pro:
-        yield arraytools.pad_along_axis(arr, amt, axis, constant_values=value)
-
-
-def expand_dims(pro: Producer, axis: Union[int, Tuple] = 0) -> Producer:
-    """Expands a producer's shape by inserting a new axis at axis position.
-
-    Args:
-        producer:
-            A producer of ndarrays.
-        axis:
-            The position in the expanded axes where the axis or axes are placed.
-
-    Examples:
-        >>> data = np.random.random((102344,))
-        >>> pro = producer(data, chunksize=100, axis=-1)
-        >>> print(pro.shape)
-        (102344,)
-        >>> print(pro.axis)
-        -1
-        >>> expanded = expand_dims(pro, axis=(0, -1))
-        >>> print(expanded.shape)
-        (1, 102344, 1)
-        >>> # take note the producing axis changes too!
-        >>> print(expanded.axis)
-        1
-
-    Returns:
-        A new producer with expanded dimensions.
-
-    Notes:
-        In contrast with numpy's expand_dims, this function must expand the
-        produced array dims and track where the producing axis ends up. Callers
-        should be aware that inserting new axes into a producer may change the
-        production axis.
-    """
-
-    # normalize the axis to insert and the producer's axis
-    axes = (axis,) if isinstance(axis, int) else axis
-    pro_axis = arraytools.normalize_axis(pro.axis, len(pro.shape))
-
-    # calculate out ndims, initialize new shape and normalize inserts
-    new_ndim = len(pro.shape) + len(axes)
-    new_shape = np.ones(new_ndim, dtype=int)
-    inserts = [arraytools.normalize_axis(ax, new_ndim) for ax in axes]
-
-    # find indices of new_shape where we will insert producer's shape
-    complements = sorted(set(range(new_ndim)).difference(inserts))
-
-    # set the new axis and insert producer's shape into new shape
-    new_axis = complements[pro_axis]
-
-    for idx, comp in enumerate(complements):
-
-        new_shape[comp] = pro.shape[idx]
-
-    func = partial(_expand_gen, pro, axes)
-    return producer(func, pro.chunksize, new_axis, tuple(new_shape))
-
-
-def _expand_gen(pro, axes):
-    """A generating function that expands the dims of each produced array
-    in a producer.
-
-    Args:
-        pro:
-            A producer of ndarrays.
-        axes:
-            A tuple of axes to insert.
-
-    Yields:
-        Arrays with expanded dims.
-    """
-
-    for arr in pro:
-        yield np.expand_dims(arr, axes)
-
-
-def multiply_along_axis(pro: Producer,
-                        arr: npt.NDArray,
-                        axis: int,
-) -> Producer:
-    """Multiplies produced arrays by a 1-D array along a single axis.
-
-    Args:
-        pro:
-            A producer of ndarrays to be multiplied along axis.
-        arr:
-            A 1-D array whose length must match producers length along a single
-            axis.
-        axis:
-            The axis along which to multiply. This function supports
-            multiplication along any single axis including the production axis.
-
-    Examples:
-        >>> x = np.arange(10000).reshape(2, 4, 1250)
-        >>> pro = producer(x, chunksize=100, axis=-1)
-        >>> arr = np.array([0, -1, 1, 0]) #1D array to multiply by
-        >>> multiplied = multiply_along_axis(pro, arr, axis=1)
-        >>> y = multiplied.to_array()
-        >>> np.allclose(x * arr.reshape(1, 4, 1), y)
-        True
-
-    Returns:
-        A new producer of arrays the same shape as the input producer.
-    """
-
-    arr = np.array(arr)
-    if arr.ndim > 1:
-        raise ValueError('Dimensions of multiplier arr must be exactly 1.')
- 
-    # ensure the arr shape matches the producers shape along axis
-    if len(arr) != pro.shape[axis]:
-        msg = 'operands could not be broadcast together with shapes {} {}'
-        raise ValueError(msg.format(pro.shape, arr.shape))
-
-    # reshape the input array to be broadcastable with produced arrays
-    ndims = len(pro.shape)
-    shape = np.ones(ndims, dtype=int)
-    shape[axis] = len(arr)
-    x = arr.reshape(shape)
-
-    # if multiplying along pro axis convert arr 'x' to producer
-    if axis == pro.axis:
-        x = producer(x, chunksize=pro.chunksize, axis=pro.axis)
-
-    func = partial(_multiply_gen, pro, x)
-    return producer(func, chunksize=pro.chunksize, axis=pro.axis,
-                    shape=pro.shape)
-
-
-def _multiply_gen(pro, multiplier):
-    """A generating helper function that multiplies produced arrays by an
-    ndarray or producer of ndarrays.
-
-    This helper function is a generating function (not a producer) and is not
-    intended to be called externally. It assumes that multipliers shape is
-    broadcastable to producers shape.
-
-    Args:
-        pro:
-            A producer of ndarrays.
-        multiplier:
-            An ndarray or a producer of ndarrays. The number of dims of this
-            object must match the dims of pro and have shape of 1 along all axes
-            except 1 axis whose length must equal the length of the producer
-            along this axis.
-
-    Yields:
-        The element-wise product of each produced array with multiplier.
-    """
-
-    # non-production axis multiplication factors
-    factors = zip_longest(pro, multiplier, fillvalue=multiplier)
-
-    # production axis multiplication factors
-    if isinstance(multiplier, Producer):
-        factors = zip(pro, multiplier)
-
-    for arr, mult in factors:
-        yield arr * mult
-
-
-def slice_along_axis(pro: Producer,
-                     start: Optional[int] = None,
-                     stop: Optional[int] = None,
-                     step: Optional[int] = None,
-                     axis: int = -1,
-) -> Producer:
-    """Returns a producer producing values between start and stop in step
-    increments along axis.
-
-    Args:
-        pro:
-            A producer instance to slice along axis.
-        start:
-            The start index of the slice along axis. If None, slice will start
-            at 0.
-        stop:
-            The stop index of the slice along axis. If None slice will extend to
-            last element(s) of producer along axis.
-        step:
-            The size of index steps between start and stop of slice.
-        axis:
-            The axis of the producer to be sliced.
-        
-    Examples:
-        >>> x = np.random.random((4,10000))
-        >>> pro = producer(x, chunksize=1000, axis=-1)
-        >>> sliced_pro = slice_along_axis(pro, 100, 200)
-        >>> np.allclose(x[:,100:200), sliced_pro.to_array())
-        True
-
-    Returns:
-        A producer of ndarrays.
-    """
-
-    # get start, stop, step indices for the slicing axis
-    start, stop, step  = slice(start, stop, step).indices(pro.shape[axis])
-
-    if axis == pro.axis:
-        # slicing along production axis is just masking
-        mask = np.zeros(pro.shape[axis])
-        mask[start:stop:step] = True
-        return producer(pro, pro.chunksize, pro.axis, mask=mask)
-
-    else:
-        # slicing along non-production axis changes shape of produced arrays
-        new_shape = list(pro.shape)
-        new_shape[axis] =  (stop - start) // step 
-        func = partial(_slice_along_gen, pro, start, stop, step, axis)
-        return producer(func, pro.chunksize, pro.axis, shape=new_shape) 
-
-
-def _slice_along_gen(pro, start, stop, step, axis):
-    """A generating helper function for slicing a producer along
-    a non-production axis between start and stop in step increments.
-
-    Args:
-        pro:
-            A producer instance to slice.
-        start:
-            The start index of the slice. May be None.
-        stop:
-            The stop index of the slice. May be None.
-        step:
-            The step size between start and stop to slice with. May be None.
-        axis:
-            The non-production axis along which to slice.
-    """
-
-    for arr in pro:
-        yield arraytools.slice_along_axis(arr, start, stop, step, axis=axis)
-
-
-if __name__ == '__main__':
-
-    x = np.random.random((4,10000))
-    pro = producer(x, chunksize=1000, axis=-1)
-
-    """
-    y = 10 * np.ones(4)
-    mpro = multiply_along_axis(pro, y, axis=0)
-
-    print(np.allclose(mpro.to_array(), x * y.reshape(4,1)))
-
-    z = 13.77 * np.ones(10000)
-    mpro2 = multiply_along_axis(pro, z, axis=-1)
-    print(np.allclose(mpro2.to_array(), x * z.reshape(1, 10000)))
-    """
-
-    sliced = slice_along_axis(pro, start=100, stop=300, axis=-1)
-    print(np.allclose(sliced.to_array(), x[:, 100:300]))
-
-    sliced2 = slice_along_axis(pro, start=1, step=2, axis=0)
-    print(np.allclose(sliced2.to_array(), x[1::2,:]))
diff --git a/tests/test_operatives.py b/tests/test_protools.py
similarity index 92%
rename from tests/test_operatives.py
rename to tests/test_protools.py
index 12cd86b..3c5634c 100644
--- a/tests/test_operatives.py
+++ b/tests/test_protools.py
@@ -1,4 +1,4 @@
-"""A module for testing ops that manipulate the size, shape and values
+"""A module for testing protools that manipulate the size, shape and values
 produced by a producer.
 
 Typical usage example:
@@ -11,7 +11,7 @@
 from pytest_lazyfixture import lazy_fixture
 
 from openseize import producer
-from openseize.tools import operatives as ops
+from openseize.core import protools
 
 
 @pytest.fixture(scope='module')
@@ -69,7 +69,7 @@ def test_production_pad(arr):
 
     # pad the producer
     amt = (10, 752)
-    padded_pro = ops.pad(pro, amt=amt, axis=pro_axis)
+    padded_pro = protools.pad(pro, amt=amt, axis=pro_axis)
     
     # build a producer from the padded array to compare against
     pads = [(0,0) for _ in range(arr.ndim)]
@@ -102,7 +102,7 @@ def test_nonproduction_pad(arr):
     amt = (190, 13)
 
     # create padded_producers
-    padded_pros = [ops.pad(pro, amt=amt, axis=ax) for ax in padding_axes]
+    padded_pros = [protools.pad(pro, amt=amt, axis=ax) for ax in padding_axes]
 
     # create ground truth padded array producers
     ground_truth_pros = []
@@ -137,7 +137,7 @@ def test_expand_dims(arr):
     pro = producer(arr, chunksize=1000, axis=axis)
     
     for insertion in range(arr.ndim):
-        expanded = ops.expand_dims(pro, axis=insertion)
+        expanded = protools.expand_dims(pro, axis=insertion)
         
         for x, y in zip(pro, expanded):
             
@@ -162,7 +162,7 @@ def test_multiply_along_axis(arr):
     
     multiplier = 4.3 * np.ones(pro.shape[0])
     # call multiply along 0th axis for each transposed arr
-    result = ops.multiply_along_axis(pro, multiplier, axis=0).to_array()
+    result = protools.multiply_along_axis(pro, multiplier, axis=0).to_array()
 
     #broadcast multiplier for multiplication along 0th axis
     shape = np.ones(arr.ndim, dtype=int)
@@ -196,6 +196,6 @@ def test_slice_along_axis(arr):
         start, stop, step = 0, 2, None
 
     # slice and convert to array
-    result = ops.slice_along_axis(pro, start, stop, step, axis=0).to_array()
+    result = protools.slice_along_axis(pro, start, stop, step, axis=0).to_array()
 
     assert np.allclose(arr[start:stop:step], result)