From 64da688270f1b33acc2bfcd4c3166a50c13e4f3c Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 6 Nov 2019 13:28:37 -0800 Subject: [PATCH 1/2] CLN: dtypes.cast cleanup following scalar restriction --- pandas/core/dtypes/cast.py | 79 ++++++++++++++++-------------------- pandas/core/frame.py | 2 +- pandas/core/nanops.py | 2 +- pandas/core/ops/array_ops.py | 2 +- 4 files changed, 38 insertions(+), 47 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index fad80d6bf5745..ee9cac831058d 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -235,6 +235,7 @@ def trans(x): def maybe_upcast_putmask(result: np.ndarray, mask: np.ndarray, other): """ A safe version of putmask that potentially upcasts the result. + The result is replaced with the first N elements of other, where N is the number of True values in mask. If the length of other is shorter than N, other will be repeated. @@ -246,13 +247,13 @@ def maybe_upcast_putmask(result: np.ndarray, mask: np.ndarray, other): necessary. mask : boolean ndarray other : scalar - The source value + The source value. Returns ------- result : ndarray - changed : boolean - Set to true if the result array was upcasted + changed : bool + Set to true if the result array was upcasted. Examples -------- @@ -275,33 +276,12 @@ def maybe_upcast_putmask(result: np.ndarray, mask: np.ndarray, other): # NaN -> NaT # integer or integer array -> date-like array if is_datetimelike(result.dtype): - if is_scalar(other): - if isna(other): - other = result.dtype.type("nat") - elif is_integer(other): - other = np.array(other, dtype=result.dtype) - elif is_integer_dtype(other): + if isna(other): + other = result.dtype.type("nat") + elif is_integer(other): other = np.array(other, dtype=result.dtype) def changeit(): - - # try to directly set by expanding our array to full - # length of the boolean - try: - om = other[mask] - except (IndexError, TypeError): - # IndexError occurs in test_upcast when we have a boolean - # mask of the wrong shape - # TypeError occurs in test_upcast when `other` is a bool - pass - else: - om_at = om.astype(result.dtype) - if (om == om_at).all(): - new_result = result.values.copy() - new_result[mask] = om_at - result[:] = new_result - return result, False - # we are forced to change the dtype of the result as the input # isn't compatible r, _ = maybe_upcast(result, fill_value=other, copy=True) @@ -319,15 +299,8 @@ def changeit(): # we have a scalar or len 0 ndarray # and its nan and we are changing some values - if is_scalar(other) or (isinstance(other, np.ndarray) and other.ndim < 1): - if isna(other): - return changeit() - - # we have an ndarray and the masking has nans in it - else: - - if isna(other).any(): - return changeit() + if isna(other): + return changeit() try: np.place(result, mask, other) @@ -339,6 +312,21 @@ def changeit(): def maybe_promote(dtype, fill_value=np.nan): + """ + Find the minimal dtype that can hold both the given dtype and fill_value. + + Parameters + ---------- + dtype : np.dtype or ExceptionDtype + fill_value : scalar, default np.nan + + Returns + ------- + dtype + Upcasted from dtype argument if necessary. + fill_value + Upcasted from fill_value argument if necessary. + """ if not is_scalar(fill_value) and not is_object_dtype(dtype): # with object dtype there is nothing to promote, and the user can # pass pretty much any weird fill_value they like @@ -594,11 +582,11 @@ def infer_dtype_from_scalar(val, pandas_dtype: bool = False): def infer_dtype_from_array(arr, pandas_dtype: bool = False): """ - infer the dtype from a scalar or array + Infer the dtype from an array. Parameters ---------- - arr : scalar or array + arr : array pandas_dtype : bool, default False whether to infer dtype including pandas extension types. If False, array belongs to pandas extension types @@ -624,7 +612,6 @@ def infer_dtype_from_array(arr, pandas_dtype: bool = False): >>> infer_dtype_from_array([1, '1']) (numpy.object_, [1, '1']) - """ if isinstance(arr, np.ndarray): @@ -649,7 +636,8 @@ def infer_dtype_from_array(arr, pandas_dtype: bool = False): def maybe_infer_dtype_type(element): - """Try to infer an object's dtype, for use in arithmetic ops + """ + Try to infer an object's dtype, for use in arithmetic ops. Uses `element.dtype` if that's available. Objects implementing the iterator protocol are cast to a NumPy array, @@ -681,15 +669,18 @@ def maybe_infer_dtype_type(element): return tipo -def maybe_upcast(values, fill_value=np.nan, dtype=None, copy=False): - """ provide explicit type promotion and coercion +def maybe_upcast(values, fill_value=np.nan, dtype=None, copy: bool = False): + """ + Provide explicit type promotion and coercion. Parameters ---------- - values : the ndarray that we want to maybe upcast + values : ndarray or ExtensionArray + The array that we want to maybe upcast. fill_value : what we want to fill with dtype : if None, then use the dtype of the values, else coerce to this type - copy : if True always make a copy even if no upcast is required + copy : bool, default True + If True always make a copy even if no upcast is required. """ if not is_scalar(fill_value) and not is_object_dtype(values.dtype): # We allow arbitrary fill values for object dtype diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 40efc4c65476a..79ea8835bf17c 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4574,7 +4574,7 @@ def _maybe_casted_values(index, labels=None): values = values._data if mask.any(): - values, changed = maybe_upcast_putmask(values, mask, np.nan) + values, _ = maybe_upcast_putmask(values, mask, np.nan) if issubclass(values_type, DatetimeLikeArray): values = values_type(values, dtype=values_dtype) diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index 7e50348962fc5..2a0c40a3aaca6 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -312,7 +312,7 @@ def _get_values( # promote if needed else: - values, changed = maybe_upcast_putmask(values, mask, fill_value) + values, _ = maybe_upcast_putmask(values, mask, fill_value) # return a platform independent precision dtype dtype_max = dtype diff --git a/pandas/core/ops/array_ops.py b/pandas/core/ops/array_ops.py index 46c3b8b575af9..a6b3eabe9a6d4 100644 --- a/pandas/core/ops/array_ops.py +++ b/pandas/core/ops/array_ops.py @@ -113,7 +113,7 @@ def masked_arith_op(x, y, op): with np.errstate(all="ignore"): result[mask] = op(xrav[mask], y) - result, changed = maybe_upcast_putmask(result, ~mask, np.nan) + result, _ = maybe_upcast_putmask(result, ~mask, np.nan) result = result.reshape(x.shape) # 2D compat return result From 3a53a45d5e9a97c808170c54a7b2b6e19c00a0cf Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 6 Nov 2019 13:42:22 -0800 Subject: [PATCH 2/2] un-xfail windows for kicks --- pandas/tests/dtypes/cast/test_promote.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/pandas/tests/dtypes/cast/test_promote.py b/pandas/tests/dtypes/cast/test_promote.py index 0939e35bd64fa..7b53a44d4f91c 100644 --- a/pandas/tests/dtypes/cast/test_promote.py +++ b/pandas/tests/dtypes/cast/test_promote.py @@ -8,7 +8,6 @@ import pytest from pandas._libs.tslibs import NaT -from pandas.compat import is_platform_windows from pandas.core.dtypes.cast import maybe_promote from pandas.core.dtypes.common import ( @@ -427,11 +426,6 @@ def test_maybe_promote_datetimetz_with_datetimetz(tz_aware_fixture, tz_aware_fix dtype = DatetimeTZDtype(tz=tz_aware_fixture) fill_dtype = DatetimeTZDtype(tz=tz_aware_fixture2) - from dateutil.tz import tzlocal - - if is_platform_windows() and tz_aware_fixture2 == tzlocal(): - pytest.xfail("Cannot process fill_value with this dtype, see GH 24310") - # create array of given dtype; casts "1" to correct dtype fill_value = pd.Series([10 ** 9], dtype=fill_dtype)[0]