From 2748c0c5a674f740cbc216973707955bf63ba3e9 Mon Sep 17 00:00:00 2001 From: Angus Hollands Date: Fri, 13 Oct 2023 20:32:21 +0100 Subject: [PATCH] fix: add `highlevel`, `behavior` arguments to composite reducers (#2754) * fix: add `highlevel`, `behavior` arguments to composite reducers * test: initial, simple test * test: check two-arg functions --------- Co-authored-by: Jim Pivarski --- src/awkward/_layout.py | 25 +++ src/awkward/operations/ak_corr.py | 38 ++++- src/awkward/operations/ak_covar.py | 38 ++++- src/awkward/operations/ak_linear_fit.py | 26 ++- src/awkward/operations/ak_mean.py | 64 ++++++-- src/awkward/operations/ak_moment.py | 30 +++- src/awkward/operations/ak_ptp.py | 29 +++- src/awkward/operations/ak_softmax.py | 22 ++- src/awkward/operations/ak_std.py | 59 +++++-- src/awkward/operations/ak_var.py | 69 ++++++-- tests/test_2754_highlevel_behavior_missing.py | 155 ++++++++++++++++++ 11 files changed, 481 insertions(+), 74 deletions(-) create mode 100644 tests/test_2754_highlevel_behavior_missing.py diff --git a/src/awkward/_layout.py b/src/awkward/_layout.py index bcf801e355..a6d04f5ca3 100644 --- a/src/awkward/_layout.py +++ b/src/awkward/_layout.py @@ -36,6 +36,31 @@ def wrap_layout(content, behavior=None, highlevel=True, like=None, allow_other=F return content +def maybe_highlevel_to_lowlevel(obj): + """ + Args: + obj: an object + + Calls #ak.to_layout and returns the result iff. the object is a high-level + Awkward object, otherwise the object is returned as-is. + + This function should be removed once scalars are properly handled by `to_layout`. + """ + import awkward.highlevel + + if isinstance( + obj, + ( + awkward.highlevel.Array, + awkward.highlevel.Record, + awkward.highlevel.ArrayBuilder, + ), + ): + return awkward.to_layout(obj) + else: + return obj + + def from_arraylib(array, regulararray, recordarray): from awkward.contents import ( ByteMaskedArray, diff --git a/src/awkward/operations/ak_corr.py b/src/awkward/operations/ak_corr.py index 685848f537..d01325b0fc 100644 --- a/src/awkward/operations/ak_corr.py +++ b/src/awkward/operations/ak_corr.py @@ -3,6 +3,7 @@ import awkward as ak from awkward._behavior import behavior_of from awkward._dispatch import high_level_function +from awkward._layout import maybe_highlevel_to_lowlevel, wrap_layout from awkward._nplikes import ufuncs from awkward._nplikes.numpylike import NumpyMetadata from awkward._regularize import regularize_axis @@ -11,7 +12,17 @@ @high_level_function() -def corr(x, y, weight=None, axis=None, *, keepdims=False, mask_identity=False): +def corr( + x, + y, + weight=None, + axis=None, + *, + keepdims=False, + mask_identity=False, + highlevel=True, + behavior=None, +): """ Args: x: One coordinate to use in the correlation (anything #ak.to_layout recognizes). @@ -33,6 +44,10 @@ def corr(x, y, weight=None, axis=None, *, keepdims=False, mask_identity=False): empty lists results in None (an option type); otherwise, the calculation is followed through with the reducers' identities, usually resulting in floating-point `nan`. + highlevel (bool): If True, return an #ak.Array; otherwise, return + a low-level #ak.contents.Content subclass. + behavior (None or dict): Custom #ak.behavior for the output array, if + high-level. Computes the correlation of `x` and `y` (many types supported, including all Awkward Arrays and Records, must be broadcastable to each other). @@ -55,12 +70,12 @@ def corr(x, y, weight=None, axis=None, *, keepdims=False, mask_identity=False): yield x, y, weight # Implementation - return _impl(x, y, weight, axis, keepdims, mask_identity) + return _impl(x, y, weight, axis, keepdims, mask_identity, highlevel, behavior) -def _impl(x, y, weight, axis, keepdims, mask_identity): +def _impl(x, y, weight, axis, keepdims, mask_identity, highlevel, behavior): axis = regularize_axis(axis) - behavior = behavior_of(x, y, weight) + behavior = behavior_of(x, y, weight, behavior=behavior) x = ak.highlevel.Array( ak.operations.to_layout(x, allow_record=False, allow_other=False), behavior=behavior, @@ -76,8 +91,12 @@ def _impl(x, y, weight, axis, keepdims, mask_identity): ) with np.errstate(invalid="ignore", divide="ignore"): - xmean = ak.operations.ak_mean._impl(x, weight, axis, False, mask_identity) - ymean = ak.operations.ak_mean._impl(y, weight, axis, False, mask_identity) + xmean = ak.operations.ak_mean._impl( + x, weight, axis, False, mask_identity, highlevel=True, behavior=behavior + ) + ymean = ak.operations.ak_mean._impl( + y, weight, axis, False, mask_identity, highlevel=True, behavior=behavior + ) xdiff = x - xmean ydiff = y - ymean if weight is None: @@ -130,4 +149,9 @@ def _impl(x, y, weight, axis, keepdims, mask_identity): highlevel=True, behavior=behavior, ) - return sumwxy / ufuncs.sqrt(sumwxx * sumwyy) + return wrap_layout( + maybe_highlevel_to_lowlevel(sumwxy / ufuncs.sqrt(sumwxx * sumwyy)), + behavior=behavior, + highlevel=highlevel, + allow_other=True, + ) diff --git a/src/awkward/operations/ak_covar.py b/src/awkward/operations/ak_covar.py index c552d22baf..7c8b26e678 100644 --- a/src/awkward/operations/ak_covar.py +++ b/src/awkward/operations/ak_covar.py @@ -3,6 +3,7 @@ import awkward as ak from awkward._behavior import behavior_of from awkward._dispatch import high_level_function +from awkward._layout import maybe_highlevel_to_lowlevel, wrap_layout from awkward._nplikes.numpylike import NumpyMetadata from awkward._regularize import regularize_axis @@ -10,7 +11,17 @@ @high_level_function() -def covar(x, y, weight=None, axis=None, *, keepdims=False, mask_identity=False): +def covar( + x, + y, + weight=None, + axis=None, + *, + keepdims=False, + mask_identity=False, + highlevel=True, + behavior=None, +): """ Args: x: One coordinate to use in the covariance calculation (anything #ak.to_layout recognizes). @@ -32,6 +43,10 @@ def covar(x, y, weight=None, axis=None, *, keepdims=False, mask_identity=False): empty lists results in None (an option type); otherwise, the calculation is followed through with the reducers' identities, usually resulting in floating-point `nan`. + highlevel (bool): If True, return an #ak.Array; otherwise, return + a low-level #ak.contents.Content subclass. + behavior (None or dict): Custom #ak.behavior for the output array, if + high-level. Computes the covariance of `x` and `y` (many types supported, including all Awkward Arrays and Records, must be broadcastable to each other). @@ -52,12 +67,12 @@ def covar(x, y, weight=None, axis=None, *, keepdims=False, mask_identity=False): yield x, y, weight # Implementation - return _impl(x, y, weight, axis, keepdims, mask_identity) + return _impl(x, y, weight, axis, keepdims, mask_identity, highlevel, behavior) -def _impl(x, y, weight, axis, keepdims, mask_identity): +def _impl(x, y, weight, axis, keepdims, mask_identity, highlevel, behavior): axis = regularize_axis(axis) - behavior = behavior_of(x, y, weight) + behavior = behavior_of(x, y, weight, behavior=behavior) x = ak.highlevel.Array( ak.operations.to_layout(x, allow_record=False, allow_other=False), behavior=behavior, @@ -73,8 +88,12 @@ def _impl(x, y, weight, axis, keepdims, mask_identity): ) with np.errstate(invalid="ignore", divide="ignore"): - xmean = ak.operations.ak_mean._impl(x, weight, axis, False, mask_identity) - ymean = ak.operations.ak_mean._impl(y, weight, axis, False, mask_identity) + xmean = ak.operations.ak_mean._impl( + x, weight, axis, False, mask_identity, highlevel=True, behavior=behavior + ) + ymean = ak.operations.ak_mean._impl( + y, weight, axis, False, mask_identity, highlevel=True, behavior=behavior + ) if weight is None: sumw = ak.operations.ak_count._impl( x, @@ -109,4 +128,9 @@ def _impl(x, y, weight, axis, keepdims, mask_identity): highlevel=True, behavior=behavior, ) - return sumwxy / sumw + return wrap_layout( + maybe_highlevel_to_lowlevel(sumwxy / sumw), + behavior=behavior, + highlevel=highlevel, + allow_other=True, + ) diff --git a/src/awkward/operations/ak_linear_fit.py b/src/awkward/operations/ak_linear_fit.py index 9bec1c2a7b..2c6a1dab2c 100644 --- a/src/awkward/operations/ak_linear_fit.py +++ b/src/awkward/operations/ak_linear_fit.py @@ -15,7 +15,17 @@ @high_level_function() -def linear_fit(x, y, weight=None, axis=None, *, keepdims=False, mask_identity=False): +def linear_fit( + x, + y, + weight=None, + axis=None, + *, + keepdims=False, + mask_identity=False, + highlevel=True, + behavior=None, +): """ Args: x: One coordinate to use in the linear fit (anything #ak.to_layout recognizes). @@ -37,6 +47,10 @@ def linear_fit(x, y, weight=None, axis=None, *, keepdims=False, mask_identity=Fa empty lists results in None (an option type); otherwise, the calculation is followed through with the reducers' identities, usually resulting in floating-point `nan`. + highlevel (bool): If True, return an #ak.Array; otherwise, return + a low-level #ak.contents.Content subclass. + behavior (None or dict): Custom #ak.behavior for the output array, if + high-level. Computes the linear fit of `y` with respect to `x` (many types supported, including all Awkward Arrays and Records, must be broadcastable to each @@ -72,12 +86,12 @@ def linear_fit(x, y, weight=None, axis=None, *, keepdims=False, mask_identity=Fa yield x, y, weight # Implementation - return _impl(x, y, weight, axis, keepdims, mask_identity) + return _impl(x, y, weight, axis, keepdims, mask_identity, highlevel, behavior) -def _impl(x, y, weight, axis, keepdims, mask_identity): +def _impl(x, y, weight, axis, keepdims, mask_identity, highlevel, behavior): axis = regularize_axis(axis) - behavior = behavior_of(x, y, weight) + behavior = behavior_of(x, y, weight, behavior=behavior) backend = backend_of(x, y, weight, coerce_to_common=True, default=cpu) x = ak.highlevel.Array( ak.operations.to_layout(x, allow_record=False, allow_other=False).to_backend( @@ -247,4 +261,6 @@ def _impl(x, y, weight, axis, keepdims, mask_identity): if scalar: out = out[0] - return wrap_layout(out, highlevel=True, behavior=behavior, allow_other=scalar) + return wrap_layout( + out, highlevel=highlevel, behavior=behavior, allow_other=scalar + ) diff --git a/src/awkward/operations/ak_mean.py b/src/awkward/operations/ak_mean.py index 3ccc612dcb..8982cfff76 100644 --- a/src/awkward/operations/ak_mean.py +++ b/src/awkward/operations/ak_mean.py @@ -4,7 +4,7 @@ from awkward._behavior import behavior_of from awkward._connect.numpy import UNSUPPORTED from awkward._dispatch import high_level_function -from awkward._layout import maybe_posaxis +from awkward._layout import maybe_highlevel_to_lowlevel, maybe_posaxis, wrap_layout from awkward._nplikes.numpylike import NumpyMetadata from awkward._regularize import regularize_axis @@ -12,7 +12,16 @@ @high_level_function() -def mean(x, weight=None, axis=None, *, keepdims=False, mask_identity=False): +def mean( + x, + weight=None, + axis=None, + *, + keepdims=False, + mask_identity=False, + highlevel=True, + behavior=None, +): """ Args: x: The data on which to compute the mean (anything #ak.to_layout recognizes). @@ -33,6 +42,10 @@ def mean(x, weight=None, axis=None, *, keepdims=False, mask_identity=False): empty lists results in None (an option type); otherwise, the calculation is followed through with the reducers' identities, usually resulting in floating-point `nan`. + highlevel (bool): If True, return an #ak.Array; otherwise, return + a low-level #ak.contents.Content subclass. + behavior (None or dict): Custom #ak.behavior for the output array, if + high-level. Computes the mean in each group of elements from `x` (many types supported, including all Awkward Arrays and Records). The grouping @@ -79,11 +92,20 @@ def mean(x, weight=None, axis=None, *, keepdims=False, mask_identity=False): yield x, weight # Implementation - return _impl(x, weight, axis, keepdims, mask_identity) + return _impl(x, weight, axis, keepdims, mask_identity, highlevel, behavior) @high_level_function() -def nanmean(x, weight=None, axis=None, *, keepdims=False, mask_identity=True): +def nanmean( + x, + weight=None, + axis=None, + *, + keepdims=False, + mask_identity=True, + highlevel=True, + behavior=None, +): """ Args: x: The data on which to compute the mean (anything #ak.to_layout recognizes). @@ -104,6 +126,10 @@ def nanmean(x, weight=None, axis=None, *, keepdims=False, mask_identity=True): empty lists results in None (an option type); otherwise, the calculation is followed through with the reducers' identities, usually resulting in floating-point `nan`. + highlevel (bool): If True, return an #ak.Array; otherwise, return + a low-level #ak.contents.Content subclass. + behavior (None or dict): Custom #ak.behavior for the output array, if + high-level. Like #ak.mean, but treating NaN ("not a number") values as missing. @@ -119,20 +145,22 @@ def nanmean(x, weight=None, axis=None, *, keepdims=False, mask_identity=True): yield x, weight if weight is not None: - weight = ak.operations.ak_nan_to_none._impl(weight, False, None) + weight = ak.operations.ak_nan_to_none._impl(weight, False, behavior) return _impl( - ak.operations.ak_nan_to_none._impl(x, False, None), + ak.operations.ak_nan_to_none._impl(x, False, behavior), weight, axis, keepdims, mask_identity, + highlevel=highlevel, + behavior=behavior, ) -def _impl(x, weight, axis, keepdims, mask_identity): +def _impl(x, weight, axis, keepdims, mask_identity, highlevel, behavior): axis = regularize_axis(axis) - behavior = behavior_of(x, weight) + behavior = behavior_of(x, weight, behavior=behavior) x = ak.highlevel.Array( ak.operations.to_layout(x, allow_record=False, allow_other=False), behavior=behavior, @@ -151,7 +179,7 @@ def _impl(x, weight, axis, keepdims, mask_identity): keepdims=True, mask_identity=True, highlevel=True, - behavior=None, + behavior=behavior, ) sumwx = ak.operations.ak_sum._impl( x, @@ -159,7 +187,7 @@ def _impl(x, weight, axis, keepdims, mask_identity): keepdims=True, mask_identity=True, highlevel=True, - behavior=None, + behavior=behavior, ) else: sumw = ak.operations.ak_sum._impl( @@ -168,7 +196,7 @@ def _impl(x, weight, axis, keepdims, mask_identity): keepdims, mask_identity, highlevel=True, - behavior=None, + behavior=behavior, ) sumwx = ak.operations.ak_sum._impl( x * weight, @@ -176,13 +204,15 @@ def _impl(x, weight, axis, keepdims, mask_identity): keepdims=True, mask_identity=True, highlevel=True, - behavior=None, + behavior=behavior, ) out = sumwx / sumw if not mask_identity: - out = ak.highlevel.Array(ak.operations.fill_none(out, np.nan, axis=-1)) + out = ak.operations.fill_none( + out, np.nan, axis=-1, behavior=behavior, highlevel=True + ) if axis is None: if not keepdims: @@ -191,8 +221,12 @@ def _impl(x, weight, axis, keepdims, mask_identity): if not keepdims: posaxis = maybe_posaxis(out.layout, axis, 1) out = out[(slice(None, None),) * posaxis + (0,)] - - return out + return wrap_layout( + maybe_highlevel_to_lowlevel(out), + behavior=behavior, + highlevel=highlevel, + allow_other=True, + ) @ak._connect.numpy.implements("mean") diff --git a/src/awkward/operations/ak_moment.py b/src/awkward/operations/ak_moment.py index f00d192f68..b8005d95be 100644 --- a/src/awkward/operations/ak_moment.py +++ b/src/awkward/operations/ak_moment.py @@ -3,6 +3,7 @@ import awkward as ak from awkward._behavior import behavior_of from awkward._dispatch import high_level_function +from awkward._layout import maybe_highlevel_to_lowlevel, wrap_layout from awkward._nplikes.numpylike import NumpyMetadata from awkward._regularize import regularize_axis @@ -10,7 +11,17 @@ @high_level_function() -def moment(x, n, weight=None, axis=None, *, keepdims=False, mask_identity=False): +def moment( + x, + n, + weight=None, + axis=None, + *, + keepdims=False, + mask_identity=False, + highlevel=True, + behavior=None, +): """ Args: x: The data on which to compute the moment (anything #ak.to_layout recognizes). @@ -33,6 +44,10 @@ def moment(x, n, weight=None, axis=None, *, keepdims=False, mask_identity=False) empty lists results in None (an option type); otherwise, the calculation is followed through with the reducers' identities, usually resulting in floating-point `nan`. + highlevel (bool): If True, return an #ak.Array; otherwise, return + a low-level #ak.contents.Content subclass. + behavior (None or dict): Custom #ak.behavior for the output array, if + high-level. Computes the `n`th moment in each group of elements from `x` (many types supported, including all Awkward Arrays and Records). The grouping @@ -56,12 +71,12 @@ def moment(x, n, weight=None, axis=None, *, keepdims=False, mask_identity=False) yield x, weight # Implementation - return _impl(x, n, weight, axis, keepdims, mask_identity) + return _impl(x, n, weight, axis, keepdims, mask_identity, highlevel, behavior) -def _impl(x, n, weight, axis, keepdims, mask_identity): +def _impl(x, n, weight, axis, keepdims, mask_identity, highlevel, behavior): axis = regularize_axis(axis) - behavior = behavior_of(x, weight) + behavior = behavior_of(x, weight, behavior=behavior) x = ak.highlevel.Array( ak.operations.to_layout(x, allow_record=False, allow_other=False), behavior=behavior, @@ -107,4 +122,9 @@ def _impl(x, n, weight, axis, keepdims, mask_identity): highlevel=True, behavior=behavior, ) - return sumwxn / sumw + return wrap_layout( + maybe_highlevel_to_lowlevel(sumwxn / sumw), + behavior=behavior, + highlevel=highlevel, + allow_other=True, + ) diff --git a/src/awkward/operations/ak_ptp.py b/src/awkward/operations/ak_ptp.py index 32fdaa97fb..4e4e72dd2a 100644 --- a/src/awkward/operations/ak_ptp.py +++ b/src/awkward/operations/ak_ptp.py @@ -4,7 +4,7 @@ from awkward._behavior import behavior_of from awkward._connect.numpy import UNSUPPORTED from awkward._dispatch import high_level_function -from awkward._layout import maybe_posaxis +from awkward._layout import maybe_highlevel_to_lowlevel, maybe_posaxis, wrap_layout from awkward._nplikes.numpylike import NumpyMetadata from awkward._regularize import regularize_axis @@ -12,7 +12,15 @@ @high_level_function() -def ptp(array, axis=None, *, keepdims=False, mask_identity=True): +def ptp( + array, + axis=None, + *, + keepdims=False, + mask_identity=True, + highlevel=True, + behavior=None, +): """ Args: array: Array-like data (anything #ak.to_layout recognizes). @@ -63,12 +71,12 @@ def ptp(array, axis=None, *, keepdims=False, mask_identity=True): yield (array,) # Implementation - return _impl(array, axis, keepdims, mask_identity) + return _impl(array, axis, keepdims, mask_identity, highlevel, behavior) -def _impl(array, axis, keepdims, mask_identity): +def _impl(array, axis, keepdims, mask_identity, highlevel, behavior): axis = regularize_axis(axis) - behavior = behavior_of(array) + behavior = behavior_of(array, behavior=behavior) layout = ak.operations.to_layout(array, allow_record=False, allow_other=False) with np.errstate(invalid="ignore", divide="ignore"): @@ -96,7 +104,9 @@ def _impl(array, axis, keepdims, mask_identity): assert maxi is not None and mini is not None if not mask_identity: - out = ak.highlevel.Array(ak.operations.fill_none(out, 0, axis=-1)) + out = ak.operations.fill_none( + out, 0, axis=-1, behavior=behavior, highlevel=True + ) if axis is None: if not keepdims: @@ -106,7 +116,12 @@ def _impl(array, axis, keepdims, mask_identity): posaxis = maybe_posaxis(out.layout, axis, 1) out = out[(slice(None, None),) * posaxis + (0,)] - return out + return wrap_layout( + maybe_highlevel_to_lowlevel(out), + behavior=behavior, + highlevel=highlevel, + allow_other=True, + ) @ak._connect.numpy.implements("ptp") diff --git a/src/awkward/operations/ak_softmax.py b/src/awkward/operations/ak_softmax.py index 00eed01048..22f6553665 100644 --- a/src/awkward/operations/ak_softmax.py +++ b/src/awkward/operations/ak_softmax.py @@ -3,6 +3,7 @@ import awkward as ak from awkward._behavior import behavior_of from awkward._dispatch import high_level_function +from awkward._layout import maybe_highlevel_to_lowlevel, wrap_layout from awkward._nplikes import ufuncs from awkward._nplikes.numpylike import NumpyMetadata from awkward._regularize import regularize_axis @@ -11,7 +12,9 @@ @high_level_function() -def softmax(x, axis=None, *, keepdims=False, mask_identity=False): +def softmax( + x, axis=None, *, keepdims=False, mask_identity=False, highlevel=True, behavior=None +): """ Args: x: The data on which to compute the softmax (anything #ak.to_layout recognizes). @@ -28,6 +31,10 @@ def softmax(x, axis=None, *, keepdims=False, mask_identity=False): empty lists results in None (an option type); otherwise, the calculation is followed through with the reducers' identities, usually resulting in floating-point `nan`. + highlevel (bool): If True, return an #ak.Array; otherwise, return + a low-level #ak.contents.Content subclass. + behavior (None or dict): Custom #ak.behavior for the output array, if + high-level. Computes the softmax in each group of elements from `x` (many types supported, including all Awkward Arrays and Records). The grouping @@ -48,12 +55,12 @@ def softmax(x, axis=None, *, keepdims=False, mask_identity=False): yield (x,) # Implementation - return _impl(x, axis, keepdims, mask_identity) + return _impl(x, axis, keepdims, mask_identity, highlevel, behavior) -def _impl(x, axis, keepdims, mask_identity): +def _impl(x, axis, keepdims, mask_identity, highlevel, behavior): axis = regularize_axis(axis) - behavior = behavior_of(x) + behavior = behavior_of(x, behavior=behavior) x = ak.highlevel.Array( ak.operations.to_layout(x, allow_record=False, allow_other=False), behavior=behavior, @@ -69,4 +76,9 @@ def _impl(x, axis, keepdims, mask_identity): highlevel=True, behavior=behavior, ) - return expx / denom + return wrap_layout( + maybe_highlevel_to_lowlevel(expx / denom), + behavior=behavior, + highlevel=highlevel, + allow_other=True, + ) diff --git a/src/awkward/operations/ak_std.py b/src/awkward/operations/ak_std.py index 858fed7eeb..cce2e13c35 100644 --- a/src/awkward/operations/ak_std.py +++ b/src/awkward/operations/ak_std.py @@ -4,7 +4,7 @@ from awkward._behavior import behavior_of from awkward._connect.numpy import UNSUPPORTED from awkward._dispatch import high_level_function -from awkward._layout import maybe_posaxis +from awkward._layout import maybe_highlevel_to_lowlevel, maybe_posaxis, wrap_layout from awkward._nplikes import ufuncs from awkward._nplikes.numpylike import NumpyMetadata from awkward._regularize import regularize_axis @@ -13,7 +13,17 @@ @high_level_function() -def std(x, weight=None, ddof=0, axis=None, *, keepdims=False, mask_identity=False): +def std( + x, + weight=None, + ddof=0, + axis=None, + *, + keepdims=False, + mask_identity=False, + highlevel=True, + behavior=None, +): """ Args: x: The data on which to compute the standard deviation (anything #ak.to_layout recognizes). @@ -37,6 +47,10 @@ def std(x, weight=None, ddof=0, axis=None, *, keepdims=False, mask_identity=Fals empty lists results in None (an option type); otherwise, the calculation is followed through with the reducers' identities, usually resulting in floating-point `nan`. + highlevel (bool): If True, return an #ak.Array; otherwise, return + a low-level #ak.contents.Content subclass. + behavior (None or dict): Custom #ak.behavior for the output array, if + high-level. Computes the standard deviation in each group of elements from `x` (many types supported, including all Awkward Arrays and Records). The @@ -61,11 +75,21 @@ def std(x, weight=None, ddof=0, axis=None, *, keepdims=False, mask_identity=Fals yield x, weight # Implementation - return _impl(x, weight, ddof, axis, keepdims, mask_identity) + return _impl(x, weight, ddof, axis, keepdims, mask_identity, highlevel, behavior) @high_level_function() -def nanstd(x, weight=None, ddof=0, axis=None, *, keepdims=False, mask_identity=True): +def nanstd( + x, + weight=None, + ddof=0, + axis=None, + *, + keepdims=False, + mask_identity=True, + highlevel=True, + behavior=None, +): """ Args: x: The data on which to compute the standard deviation (anything #ak.to_layout recognizes). @@ -89,6 +113,10 @@ def nanstd(x, weight=None, ddof=0, axis=None, *, keepdims=False, mask_identity=T empty lists results in None (an option type); otherwise, the calculation is followed through with the reducers' identities, usually resulting in floating-point `nan`. + highlevel (bool): If True, return an #ak.Array; otherwise, return + a low-level #ak.contents.Content subclass. + behavior (None or dict): Custom #ak.behavior for the output array, if + high-level. Like #ak.std, but treating NaN ("not a number") values as missing. @@ -105,21 +133,23 @@ def nanstd(x, weight=None, ddof=0, axis=None, *, keepdims=False, mask_identity=T # Implementation if weight is not None: - weight = ak.operations.ak_nan_to_none._impl(weight, False, None) + weight = ak.operations.ak_nan_to_none._impl(weight, False, behavior) return _impl( - ak.operations.ak_nan_to_none._impl(x, False, None), + ak.operations.ak_nan_to_none._impl(x, False, behavior), weight, ddof, axis, keepdims, mask_identity, + highlevel=highlevel, + behavior=behavior, ) -def _impl(x, weight, ddof, axis, keepdims, mask_identity): +def _impl(x, weight, ddof, axis, keepdims, mask_identity, highlevel, behavior): axis = regularize_axis(axis) - behavior = behavior_of(x, weight) + behavior = behavior_of(x, weight, behavior=behavior) x = ak.highlevel.Array( ak.operations.to_layout(x, allow_record=False, allow_other=False), behavior=behavior, @@ -139,11 +169,15 @@ def _impl(x, weight, ddof, axis, keepdims, mask_identity): axis, keepdims=True, mask_identity=True, + highlevel=True, + behavior=behavior, ) ) if not mask_identity: - out = ak.highlevel.Array(ak.operations.fill_none(out, np.nan, axis=-1)) + out = ak.operations.fill_none( + out, np.nan, axis=-1, behavior=behavior, highlevel=True + ) if axis is None: if not keepdims: @@ -153,7 +187,12 @@ def _impl(x, weight, ddof, axis, keepdims, mask_identity): posaxis = maybe_posaxis(out.layout, axis, 1) out = out[(slice(None, None),) * posaxis + (0,)] - return out + return wrap_layout( + maybe_highlevel_to_lowlevel(out), + behavior=behavior, + highlevel=highlevel, + allow_other=True, + ) @ak._connect.numpy.implements("std") diff --git a/src/awkward/operations/ak_var.py b/src/awkward/operations/ak_var.py index afda594080..ad117b4eb2 100644 --- a/src/awkward/operations/ak_var.py +++ b/src/awkward/operations/ak_var.py @@ -4,7 +4,7 @@ from awkward._behavior import behavior_of from awkward._connect.numpy import UNSUPPORTED from awkward._dispatch import high_level_function -from awkward._layout import maybe_posaxis +from awkward._layout import maybe_highlevel_to_lowlevel, maybe_posaxis, wrap_layout from awkward._nplikes.numpylike import NumpyMetadata from awkward._regularize import regularize_axis @@ -12,7 +12,17 @@ @high_level_function() -def var(x, weight=None, ddof=0, axis=None, *, keepdims=False, mask_identity=False): +def var( + x, + weight=None, + ddof=0, + axis=None, + *, + keepdims=False, + mask_identity=False, + highlevel=True, + behavior=None, +): """ Args: x: The data on which to compute the variance (anything #ak.to_layout recognizes). @@ -36,6 +46,10 @@ def var(x, weight=None, ddof=0, axis=None, *, keepdims=False, mask_identity=Fals empty lists results in None (an option type); otherwise, the calculation is followed through with the reducers' identities, usually resulting in floating-point `nan`. + highlevel (bool): If True, return an #ak.Array; otherwise, return + a low-level #ak.contents.Content subclass. + behavior (None or dict): Custom #ak.behavior for the output array, if + high-level. Computes the variance in each group of elements from `x` (many types supported, including all Awkward Arrays and Records). The grouping @@ -66,11 +80,21 @@ def var(x, weight=None, ddof=0, axis=None, *, keepdims=False, mask_identity=Fals yield x, weight # Implementation - return _impl(x, weight, ddof, axis, keepdims, mask_identity) + return _impl(x, weight, ddof, axis, keepdims, mask_identity, highlevel, behavior) @high_level_function() -def nanvar(x, weight=None, ddof=0, axis=None, *, keepdims=False, mask_identity=True): +def nanvar( + x, + weight=None, + ddof=0, + axis=None, + *, + keepdims=False, + mask_identity=True, + highlevel=True, + behavior=None, +): """ Args: x: The data on which to compute the variance (anything #ak.to_layout recognizes). @@ -94,6 +118,10 @@ def nanvar(x, weight=None, ddof=0, axis=None, *, keepdims=False, mask_identity=T empty lists results in None (an option type); otherwise, the calculation is followed through with the reducers' identities, usually resulting in floating-point `nan`. + highlevel (bool): If True, return an #ak.Array; otherwise, return + a low-level #ak.contents.Content subclass. + behavior (None or dict): Custom #ak.behavior for the output array, if + high-level. Like #ak.var, but treating NaN ("not a number") values as missing. @@ -119,12 +147,14 @@ def nanvar(x, weight=None, ddof=0, axis=None, *, keepdims=False, mask_identity=T axis, keepdims, mask_identity, + highlevel=highlevel, + behavior=behavior, ) -def _impl(x, weight, ddof, axis, keepdims, mask_identity): +def _impl(x, weight, ddof, axis, keepdims, mask_identity, highlevel, behavior): axis = regularize_axis(axis) - behavior = behavior_of(x, weight) + behavior = behavior_of(x, weight, behavior=behavior) x = ak.highlevel.Array( ak.operations.to_layout(x, allow_record=False, allow_other=False), behavior=behavior, @@ -137,7 +167,13 @@ def _impl(x, weight, ddof, axis, keepdims, mask_identity): with np.errstate(invalid="ignore", divide="ignore"): xmean = ak.operations.ak_mean._impl( - x, weight, axis, keepdims=True, mask_identity=True + x, + weight, + axis, + keepdims=True, + mask_identity=True, + highlevel=True, + behavior=behavior, ) if weight is None: sumw = ak.operations.ak_count._impl( @@ -146,7 +182,7 @@ def _impl(x, weight, ddof, axis, keepdims, mask_identity): keepdims=True, mask_identity=True, highlevel=True, - behavior=None, + behavior=behavior, ) sumwxx = ak.operations.ak_sum._impl( (x - xmean) ** 2, @@ -154,7 +190,7 @@ def _impl(x, weight, ddof, axis, keepdims, mask_identity): keepdims=True, mask_identity=True, highlevel=True, - behavior=None, + behavior=behavior, ) else: sumw = ak.operations.ak_sum._impl( @@ -163,7 +199,7 @@ def _impl(x, weight, ddof, axis, keepdims, mask_identity): keepdims=True, mask_identity=True, highlevel=True, - behavior=None, + behavior=behavior, ) sumwxx = ak.operations.ak_sum._impl( (x - xmean) ** 2 * weight, @@ -171,7 +207,7 @@ def _impl(x, weight, ddof, axis, keepdims, mask_identity): keepdims=True, mask_identity=True, highlevel=True, - behavior=None, + behavior=behavior, ) if ddof != 0: out = (sumwxx / sumw) * (sumw / (sumw - ddof)) @@ -179,7 +215,9 @@ def _impl(x, weight, ddof, axis, keepdims, mask_identity): out = sumwxx / sumw if not mask_identity: - out = ak.highlevel.Array(ak.operations.fill_none(out, np.nan, axis=-1)) + out = ak.operations.fill_none( + out, np.nan, axis=-1, behavior=behavior, highlevel=True + ) if axis is None: if not keepdims: @@ -189,7 +227,12 @@ def _impl(x, weight, ddof, axis, keepdims, mask_identity): posaxis = maybe_posaxis(out.layout, axis, 1) out = out[(slice(None, None),) * posaxis + (0,)] - return out + return wrap_layout( + maybe_highlevel_to_lowlevel(out), + behavior=behavior, + highlevel=highlevel, + allow_other=True, + ) @ak._connect.numpy.implements("var") diff --git a/tests/test_2754_highlevel_behavior_missing.py b/tests/test_2754_highlevel_behavior_missing.py new file mode 100644 index 0000000000..f9b349c0f8 --- /dev/null +++ b/tests/test_2754_highlevel_behavior_missing.py @@ -0,0 +1,155 @@ +# BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE + +import pytest + +import awkward as ak + +behavior_1 = {"foo": "bar"} +behavior_2 = {"baz": "bargh!"} +behavior = {**behavior_1, **behavior_2} + + +@pytest.mark.parametrize( + "func", + [ + ak.softmax, + ak.any, + ak.min, + ak.argmin, + ak.sum, + ak.ptp, + ak.std, + ak.count_nonzero, + lambda *args, **kwargs: ak.moment(*args, **kwargs, n=3), + ak.argmax, + ak.all, + ak.mean, + ak.max, + ak.prod, + ak.count, + ak.var, + ], +) +def test_impl(func): + assert isinstance( + func([[1, 2, 3, 4], [5], [10]], axis=-1, highlevel=True), ak.Array + ) + assert isinstance( + func([[1, 2, 3, 4], [5], [10]], axis=-1, highlevel=False), ak.contents.Content + ) + assert ( + func( + ak.Array([[1, 2, 3, 4], [5], [10]], behavior=behavior_1), + axis=-1, + highlevel=True, + behavior=behavior_2, + ).behavior + == behavior_2 + ) + assert ( + func( + ak.Array([[1, 2, 3, 4], [5], [10]], behavior=behavior_1), + axis=-1, + highlevel=True, + ).behavior + == behavior_1 + ) + + +@pytest.mark.parametrize("func", [ak.covar, ak.corr, ak.linear_fit]) +def test_covar(func): + assert isinstance( + func( + [[1, 2, 3, 4], [5], [10]], + [[4, 4, 0, 2], [1], [10]], + axis=-1, + highlevel=True, + ), + ak.Array, + ) + assert isinstance( + func( + [[1, 2, 3, 4], [5], [10]], + [[4, 4, 0, 2], [1], [10]], + axis=-1, + highlevel=False, + ), + ak.contents.Content, + ) + assert ( + func( + ak.Array( + [[1, 2, 3, 4], [5], [10]], + behavior=behavior_1, + ), + [[4, 4, 0, 2], [1], [10]], + axis=-1, + highlevel=True, + behavior=behavior_2, + ).behavior + == behavior_2 + ) + assert ( + func( + [[1, 2, 3, 4], [5], [10]], + ak.Array( + [[4, 4, 0, 2], [1], [10]], + behavior=behavior_1, + ), + axis=-1, + highlevel=True, + behavior=behavior_2, + ).behavior + == behavior_2 + ) + assert ( + func( + ak.Array( + [[1, 2, 3, 4], [5], [10]], + behavior=behavior_1, + ), + [[4, 4, 0, 2], [1], [10]], + axis=-1, + highlevel=True, + ).behavior + == behavior_1 + ) + assert ( + func( + [[1, 2, 3, 4], [5], [10]], + ak.Array( + [[4, 4, 0, 2], [1], [10]], + behavior=behavior_1, + ), + axis=-1, + highlevel=True, + ).behavior + == behavior_1 + ) + assert ( + func( + [[1, 2, 3, 4], [5], [10]], + [[4, 4, 0, 2], [1], [10]], + weight=ak.Array( + [[1, 2, 3, 2], [1], [1]], + behavior=behavior_1, + ), + axis=-1, + highlevel=True, + behavior=behavior_2, + ).behavior + == behavior_2 + ) + assert ( + func( + [[1, 2, 3, 4], [5], [10]], + [[4, 4, 0, 2], [1], [10]], + weight=ak.Array( + [[1, 2, 3, 2], [1], [1]], + behavior=behavior_1, + ), + axis=-1, + highlevel=True, + ).behavior + == behavior_1 + )