From e4b64c61f66dce9a96de05bbc1edb899a94aa7b9 Mon Sep 17 00:00:00 2001 From: mscaudill Date: Tue, 29 Aug 2023 09:57:54 -0500 Subject: [PATCH] [refactor]: move operatives to protools in core as they must prove themselves useful before placement in a public module (like tools) of openseize, passing pylint, mypy, doctest, codespell --- src/openseize/core/protools.py | 127 ++++-- src/openseize/tools/operatives.py | 366 ------------------ .../{test_operatives.py => test_protools.py} | 14 +- 3 files changed, 112 insertions(+), 395 deletions(-) delete mode 100644 src/openseize/tools/operatives.py rename tests/{test_operatives.py => test_protools.py} (92%) diff --git a/src/openseize/core/protools.py b/src/openseize/core/protools.py index ee6c95c..241e4c1 100644 --- a/src/openseize/core/protools.py +++ b/src/openseize/core/protools.py @@ -18,6 +18,7 @@ from typing import Optional, Tuple, Union from functools import partial +from itertools import zip_longest import numpy as np import numpy.typing as npt @@ -189,16 +190,17 @@ def multiply_along_axis(pro: Producer, arr: npt.NDArray, axis: int, ) -> Producer: - """Multiplies each produced array of a producer by a 1-D array along a - single axis. + """Multiplies produced arrays by a 1-D array along a single axis. Args: pro: A producer of ndarrays to be multiplied along axis. arr: - A 1-D array whose length must match producers shape along axis. + A 1-D array whose length must match producers length along a single + axis. axis: - The axis along which to multiply. + The axis along which to multiply. This function supports + multiplication along any single axis including the production axis. Examples: >>> x = np.arange(10000).reshape(2, 4, 1250) @@ -214,16 +216,11 @@ def multiply_along_axis(pro: Producer, """ arr = np.array(arr) - - # FIXME I need to take care of when multiplication is along producing axis - if axis == pro.axis: - if len(arr) != pro.shape[pro.axis]: - msg = ('Multiplication along the production axis requires ' - 'length of arr to match chunksize {} != {}.') - raise ValueError(msg.format(len(arr), pro.chunksize)) + if arr.ndim > 1: + raise ValueError('Dimensions of multiplier arr must be exactly 1.') # ensure the arr shape matches the producers shape along axis - elif len(arr) != pro.shape[axis]: + if len(arr) != pro.shape[axis]: msg = 'operands could not be broadcast together with shapes {} {}' raise ValueError(msg.format(pro.shape, arr.shape)) @@ -231,29 +228,115 @@ def multiply_along_axis(pro: Producer, ndims = len(pro.shape) shape = np.ones(ndims, dtype=int) shape[axis] = len(arr) - y = arr.reshape(shape) + x = arr.reshape(shape) #type: Union[npt.NDArray, Producer] + + # if multiplying along pro axis convert arr 'x' to producer + if axis == pro.axis: + x = producer(x, chunksize=pro.chunksize, axis=pro.axis) - func = partial(_multiply_gen, pro, y) + func = partial(_multiply_gen, pro, x) return producer(func, chunksize=pro.chunksize, axis=pro.axis, shape=pro.shape) -def _multiply_gen(pro, arr): +def _multiply_gen(pro, multiplier): """A generating helper function that multiplies produced arrays by an - ndarray. + ndarray or producer of ndarrays. This helper function is a generating function (not a producer) and is not - intended to be called externally. + intended to be called externally. It assumes that multipliers shape is + broadcastable to producers shape. Args: pro: A producer of ndarrays. - arr: - An ndarray of the same dims as each produced array. + multiplier: + An ndarray or a producer of ndarrays. The number of dims of this + object must match the dims of pro and have shape of 1 along all axes + except 1 axis whose length must equal the length of the producer + along this axis. Yields: - The element-wise product of each produced array with arr. + The element-wise product of each produced array with multiplier. + """ + + # non-production axis multiplication factors + factors = zip_longest(pro, multiplier, fillvalue=multiplier) + + # production axis multiplication factors + if isinstance(multiplier, Producer): + factors = zip(pro, multiplier) + + for arr, mult in factors: + yield arr * mult + + +def slice_along_axis(pro: Producer, + start: Optional[int] = None, + stop: Optional[int] = None, + step: Optional[int] = None, + axis: int = -1, +) -> Producer: + """Returns a producer producing values between start and stop in step + increments along axis. + + Args: + pro: + A producer instance to slice along axis. + start: + The start index of the slice along axis. If None, slice will start + at 0. + stop: + The stop index of the slice along axis. If None slice will extend to + last element(s) of producer along axis. + step: + The size of index steps between start and stop of slice. + axis: + The axis of the producer to be sliced. + + Examples: + >>> x = np.random.random((4,10000)) + >>> pro = producer(x, chunksize=1000, axis=-1) + >>> sliced_pro = slice_along_axis(pro, 100, 200) + >>> np.allclose(x[:,100:200], sliced_pro.to_array()) + True + + Returns: + A producer of ndarrays. """ - for x in pro: - yield x * arr + # get start, stop, step indices for the slicing axis + start, stop, step = slice(start, stop, step).indices(pro.shape[axis]) + + if axis == pro.axis: + # slicing along production axis is just masking + mask = np.zeros(pro.shape[axis], dtype=bool) + mask[start:stop:step] = True + return producer(pro, pro.chunksize, pro.axis, mask=mask) + + # slicing along non-production axis changes shape of produced arrays + new_shape = list(pro.shape) + new_shape[axis] = (stop - start) // step + func = partial(_slice_along_gen, pro, start, stop, step, axis) + return producer(func, pro.chunksize, pro.axis, shape=new_shape) + + +def _slice_along_gen(pro, start, stop, step, axis): + """A generating helper function for slicing a producer along + a non-production axis between start and stop in step increments. + + Args: + pro: + A producer instance to slice. + start: + The start index of the slice. May be None. + stop: + The stop index of the slice. May be None. + step: + The step size between start and stop to slice with. May be None. + axis: + The non-production axis along which to slice. + """ + + for arr in pro: + yield arraytools.slice_along_axis(arr, start, stop, step, axis=axis) diff --git a/src/openseize/tools/operatives.py b/src/openseize/tools/operatives.py deleted file mode 100644 index 4c40922..0000000 --- a/src/openseize/tools/operatives.py +++ /dev/null @@ -1,366 +0,0 @@ -"""A collection of tools to manipulate the size, shape or values produced by -a producer including: - - pad: - A function to pre and post pad a producer along a single axis. - expand_dims: - A function that expands a producers shape by axis insertion. - multiply_along_axis: - A function that multiplies produced values by a 1-D numpy array along - a single axis. - slice_along_axis: - A function that slices a producer along any axis. - -Note: To support concurrency all functions in this module are available at the -module level. Functions not intended to be called externally are marked as -protected with a single underscore. -""" - -from typing import Optional, Tuple, Union -from functools import partial -from itertools import zip_longest - -import numpy as np -import numpy.typing as npt -from openseize import producer -from openseize.core import arraytools -from openseize.core.producer import Producer - - -def pad(pro: Producer, - amt: Union[int, Tuple[int, int]], - axis: int, - value: Optional[float] = 0, -) -> Producer: - """Pads the edges of a producer along single axis with a constant value. - - Args: - pro: - A producer of ndarrays whose edges along axis are to be padded. - amt: - The number of pads to apply before the 0th element & after the - last element along axis. If int, amt number of values will be - prepended & appended to axis. - axis: - The axis of produced values along which to pad. - value: float - The constant value to pad the producer with. Defaults to zero. - - Examples: - >>> x = np.arange(1000).reshape(4, 250) - >>> pro = producer(x, chunksize=100, axis=-1) - >>> padded_pro = pad(pro, [3, 10], axis=-1) - >>> padded_pro.shape - (4, 263) - >>> np.allclose(np.pad(x, [(0,0), (3, 10)]), padded_pro.to_array()) - True - - Returns: - A new producer padded with value along axis. - - Notes: - This padding is less sophisticated than numpy as openseize only allows - constant pre and post padding. Future versions will likely improve this. - """ - - amts = (amt, amt) if isinstance(amt, int) else tuple(amt) - - # dispatch to generating function based on whether pad is along pro.axis - if axis == pro.axis: - genfunc = _production_axis_padder - else: - genfunc = _other_axis_padder - - # build a partial generating function and compute the return pros shape - func = partial(genfunc, pro, amts, axis, value) - new_shape = list(pro.shape) - new_shape[axis] = pro.shape[axis] + sum(amts) - - return producer(func, pro.chunksize, pro.axis, shape=new_shape) - - -def _production_axis_padder(pro, amt, axis, value): - """A generating function that pads a producer along its axis with value. - - Padding a producer along its production axis only changes the first and last - produced arrays. For argument definitions see pad. - """ - - left_shape, right_shape = list(pro.shape), list(pro.shape) - left_shape[axis] = amt[0] - right_shape[axis] = amt[1] - - # create the arrays to pad left and right along axis - left, right = [value * np.ones(s) for s in (left_shape, right_shape)] - - yield left - - for arr in pro: - yield arr - - yield right - - -def _other_axis_padder(pro, amt, axis, value): - """A generating func. that pads a producer along any non-production axis. - - Padding a producer along a non-production axis changes the shape of all - produced arrays. - """ - - for arr in pro: - yield arraytools.pad_along_axis(arr, amt, axis, constant_values=value) - - -def expand_dims(pro: Producer, axis: Union[int, Tuple] = 0) -> Producer: - """Expands a producer's shape by inserting a new axis at axis position. - - Args: - producer: - A producer of ndarrays. - axis: - The position in the expanded axes where the axis or axes are placed. - - Examples: - >>> data = np.random.random((102344,)) - >>> pro = producer(data, chunksize=100, axis=-1) - >>> print(pro.shape) - (102344,) - >>> print(pro.axis) - -1 - >>> expanded = expand_dims(pro, axis=(0, -1)) - >>> print(expanded.shape) - (1, 102344, 1) - >>> # take note the producing axis changes too! - >>> print(expanded.axis) - 1 - - Returns: - A new producer with expanded dimensions. - - Notes: - In contrast with numpy's expand_dims, this function must expand the - produced array dims and track where the producing axis ends up. Callers - should be aware that inserting new axes into a producer may change the - production axis. - """ - - # normalize the axis to insert and the producer's axis - axes = (axis,) if isinstance(axis, int) else axis - pro_axis = arraytools.normalize_axis(pro.axis, len(pro.shape)) - - # calculate out ndims, initialize new shape and normalize inserts - new_ndim = len(pro.shape) + len(axes) - new_shape = np.ones(new_ndim, dtype=int) - inserts = [arraytools.normalize_axis(ax, new_ndim) for ax in axes] - - # find indices of new_shape where we will insert producer's shape - complements = sorted(set(range(new_ndim)).difference(inserts)) - - # set the new axis and insert producer's shape into new shape - new_axis = complements[pro_axis] - - for idx, comp in enumerate(complements): - - new_shape[comp] = pro.shape[idx] - - func = partial(_expand_gen, pro, axes) - return producer(func, pro.chunksize, new_axis, tuple(new_shape)) - - -def _expand_gen(pro, axes): - """A generating function that expands the dims of each produced array - in a producer. - - Args: - pro: - A producer of ndarrays. - axes: - A tuple of axes to insert. - - Yields: - Arrays with expanded dims. - """ - - for arr in pro: - yield np.expand_dims(arr, axes) - - -def multiply_along_axis(pro: Producer, - arr: npt.NDArray, - axis: int, -) -> Producer: - """Multiplies produced arrays by a 1-D array along a single axis. - - Args: - pro: - A producer of ndarrays to be multiplied along axis. - arr: - A 1-D array whose length must match producers length along a single - axis. - axis: - The axis along which to multiply. This function supports - multiplication along any single axis including the production axis. - - Examples: - >>> x = np.arange(10000).reshape(2, 4, 1250) - >>> pro = producer(x, chunksize=100, axis=-1) - >>> arr = np.array([0, -1, 1, 0]) #1D array to multiply by - >>> multiplied = multiply_along_axis(pro, arr, axis=1) - >>> y = multiplied.to_array() - >>> np.allclose(x * arr.reshape(1, 4, 1), y) - True - - Returns: - A new producer of arrays the same shape as the input producer. - """ - - arr = np.array(arr) - if arr.ndim > 1: - raise ValueError('Dimensions of multiplier arr must be exactly 1.') - - # ensure the arr shape matches the producers shape along axis - if len(arr) != pro.shape[axis]: - msg = 'operands could not be broadcast together with shapes {} {}' - raise ValueError(msg.format(pro.shape, arr.shape)) - - # reshape the input array to be broadcastable with produced arrays - ndims = len(pro.shape) - shape = np.ones(ndims, dtype=int) - shape[axis] = len(arr) - x = arr.reshape(shape) - - # if multiplying along pro axis convert arr 'x' to producer - if axis == pro.axis: - x = producer(x, chunksize=pro.chunksize, axis=pro.axis) - - func = partial(_multiply_gen, pro, x) - return producer(func, chunksize=pro.chunksize, axis=pro.axis, - shape=pro.shape) - - -def _multiply_gen(pro, multiplier): - """A generating helper function that multiplies produced arrays by an - ndarray or producer of ndarrays. - - This helper function is a generating function (not a producer) and is not - intended to be called externally. It assumes that multipliers shape is - broadcastable to producers shape. - - Args: - pro: - A producer of ndarrays. - multiplier: - An ndarray or a producer of ndarrays. The number of dims of this - object must match the dims of pro and have shape of 1 along all axes - except 1 axis whose length must equal the length of the producer - along this axis. - - Yields: - The element-wise product of each produced array with multiplier. - """ - - # non-production axis multiplication factors - factors = zip_longest(pro, multiplier, fillvalue=multiplier) - - # production axis multiplication factors - if isinstance(multiplier, Producer): - factors = zip(pro, multiplier) - - for arr, mult in factors: - yield arr * mult - - -def slice_along_axis(pro: Producer, - start: Optional[int] = None, - stop: Optional[int] = None, - step: Optional[int] = None, - axis: int = -1, -) -> Producer: - """Returns a producer producing values between start and stop in step - increments along axis. - - Args: - pro: - A producer instance to slice along axis. - start: - The start index of the slice along axis. If None, slice will start - at 0. - stop: - The stop index of the slice along axis. If None slice will extend to - last element(s) of producer along axis. - step: - The size of index steps between start and stop of slice. - axis: - The axis of the producer to be sliced. - - Examples: - >>> x = np.random.random((4,10000)) - >>> pro = producer(x, chunksize=1000, axis=-1) - >>> sliced_pro = slice_along_axis(pro, 100, 200) - >>> np.allclose(x[:,100:200), sliced_pro.to_array()) - True - - Returns: - A producer of ndarrays. - """ - - # get start, stop, step indices for the slicing axis - start, stop, step = slice(start, stop, step).indices(pro.shape[axis]) - - if axis == pro.axis: - # slicing along production axis is just masking - mask = np.zeros(pro.shape[axis]) - mask[start:stop:step] = True - return producer(pro, pro.chunksize, pro.axis, mask=mask) - - else: - # slicing along non-production axis changes shape of produced arrays - new_shape = list(pro.shape) - new_shape[axis] = (stop - start) // step - func = partial(_slice_along_gen, pro, start, stop, step, axis) - return producer(func, pro.chunksize, pro.axis, shape=new_shape) - - -def _slice_along_gen(pro, start, stop, step, axis): - """A generating helper function for slicing a producer along - a non-production axis between start and stop in step increments. - - Args: - pro: - A producer instance to slice. - start: - The start index of the slice. May be None. - stop: - The stop index of the slice. May be None. - step: - The step size between start and stop to slice with. May be None. - axis: - The non-production axis along which to slice. - """ - - for arr in pro: - yield arraytools.slice_along_axis(arr, start, stop, step, axis=axis) - - -if __name__ == '__main__': - - x = np.random.random((4,10000)) - pro = producer(x, chunksize=1000, axis=-1) - - """ - y = 10 * np.ones(4) - mpro = multiply_along_axis(pro, y, axis=0) - - print(np.allclose(mpro.to_array(), x * y.reshape(4,1))) - - z = 13.77 * np.ones(10000) - mpro2 = multiply_along_axis(pro, z, axis=-1) - print(np.allclose(mpro2.to_array(), x * z.reshape(1, 10000))) - """ - - sliced = slice_along_axis(pro, start=100, stop=300, axis=-1) - print(np.allclose(sliced.to_array(), x[:, 100:300])) - - sliced2 = slice_along_axis(pro, start=1, step=2, axis=0) - print(np.allclose(sliced2.to_array(), x[1::2,:])) diff --git a/tests/test_operatives.py b/tests/test_protools.py similarity index 92% rename from tests/test_operatives.py rename to tests/test_protools.py index 12cd86b..3c5634c 100644 --- a/tests/test_operatives.py +++ b/tests/test_protools.py @@ -1,4 +1,4 @@ -"""A module for testing ops that manipulate the size, shape and values +"""A module for testing protools that manipulate the size, shape and values produced by a producer. Typical usage example: @@ -11,7 +11,7 @@ from pytest_lazyfixture import lazy_fixture from openseize import producer -from openseize.tools import operatives as ops +from openseize.core import protools @pytest.fixture(scope='module') @@ -69,7 +69,7 @@ def test_production_pad(arr): # pad the producer amt = (10, 752) - padded_pro = ops.pad(pro, amt=amt, axis=pro_axis) + padded_pro = protools.pad(pro, amt=amt, axis=pro_axis) # build a producer from the padded array to compare against pads = [(0,0) for _ in range(arr.ndim)] @@ -102,7 +102,7 @@ def test_nonproduction_pad(arr): amt = (190, 13) # create padded_producers - padded_pros = [ops.pad(pro, amt=amt, axis=ax) for ax in padding_axes] + padded_pros = [protools.pad(pro, amt=amt, axis=ax) for ax in padding_axes] # create ground truth padded array producers ground_truth_pros = [] @@ -137,7 +137,7 @@ def test_expand_dims(arr): pro = producer(arr, chunksize=1000, axis=axis) for insertion in range(arr.ndim): - expanded = ops.expand_dims(pro, axis=insertion) + expanded = protools.expand_dims(pro, axis=insertion) for x, y in zip(pro, expanded): @@ -162,7 +162,7 @@ def test_multiply_along_axis(arr): multiplier = 4.3 * np.ones(pro.shape[0]) # call multiply along 0th axis for each transposed arr - result = ops.multiply_along_axis(pro, multiplier, axis=0).to_array() + result = protools.multiply_along_axis(pro, multiplier, axis=0).to_array() #broadcast multiplier for multiplication along 0th axis shape = np.ones(arr.ndim, dtype=int) @@ -196,6 +196,6 @@ def test_slice_along_axis(arr): start, stop, step = 0, 2, None # slice and convert to array - result = ops.slice_along_axis(pro, start, stop, step, axis=0).to_array() + result = protools.slice_along_axis(pro, start, stop, step, axis=0).to_array() assert np.allclose(arr[start:stop:step], result)