diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml index 6cf9381a70d5b..46d6c7b5400a8 100644 --- a/.github/workflows/unit-tests.yml +++ b/.github/workflows/unit-tests.yml @@ -31,6 +31,7 @@ jobs: # Prevent the include jobs from overriding other jobs pattern: [""] pandas_future_infer_string: ["1"] + pandas_future_python_scalars: ["0"] include: - name: "Downstream Compat" env_file: actions-313-downstream_compat.yaml @@ -75,6 +76,10 @@ jobs: env_file: actions-313.yaml pandas_future_infer_string: "0" platform: ubuntu-24.04 + - name: "PANDAS_FUTURE_PYTHON_SCALARS=1" + env_file: actions-313.yaml + pandas_future_python_scalars: "1" + platform: ubuntu-24.04 - name: "Numpy Dev" env_file: actions-313-numpydev.yaml pattern: "not slow and not network and not single_cpu" @@ -92,6 +97,7 @@ jobs: LC_ALL: ${{ matrix.lc_all || '' }} PANDAS_CI: '1' PANDAS_FUTURE_INFER_STRING: ${{ matrix.pandas_future_infer_string || '1' }} + PANDAS_FUTURE_PYTHON_SCALARS: ${{ matrix.pandas_future_python_scalars || '0' }} TEST_ARGS: ${{ matrix.test_args || '' }} PYTEST_WORKERS: 'auto' PYTEST_TARGET: ${{ matrix.pytest_target || 'pandas' }} diff --git a/pandas/_config/__init__.py b/pandas/_config/__init__.py index ee709eff2eeae..dc380e3777f4e 100644 --- a/pandas/_config/__init__.py +++ b/pandas/_config/__init__.py @@ -35,6 +35,11 @@ def using_string_dtype() -> bool: return _mode_options["infer_string"] +def using_python_scalars() -> bool: + _mode_options = _global_config["future"] + return _mode_options["python_scalars"] + + def is_nan_na() -> bool: _mode_options = _global_config["mode"] return _mode_options["nan_is_na"] diff --git a/pandas/conftest.py b/pandas/conftest.py index 7fe4ec7a5ee4f..ba4d44dbb1b8a 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -2097,6 +2097,11 @@ def using_infer_string() -> bool: return pd.options.future.infer_string is True +@pytest.fixture +def using_python_scalars() -> bool: + return pd.options.future.python_scalars is True + + _warsaws: list[Any] = ["Europe/Warsaw", "dateutil/Europe/Warsaw"] if pytz is not None: _warsaws.append(pytz.timezone("Europe/Warsaw")) diff --git a/pandas/core/arraylike.py b/pandas/core/arraylike.py index eeef8016db07f..1e90cf1949a7a 100644 --- a/pandas/core/arraylike.py +++ b/pandas/core/arraylike.py @@ -15,6 +15,7 @@ from pandas._libs import lib from pandas._libs.ops_dispatch import maybe_dispatch_ufunc_to_dunder_op +from pandas.core.dtypes.cast import maybe_unbox_numpy_scalar from pandas.core.dtypes.generic import ABCNDFrame from pandas.core import roperator @@ -529,4 +530,6 @@ def dispatch_reduction_ufunc(self, ufunc: np.ufunc, method: str, *inputs, **kwar # By default, numpy's reductions do not skip NaNs, so we have to # pass skipna=False - return getattr(self, method_name)(skipna=False, **kwargs) + result = getattr(self, method_name)(skipna=False, **kwargs) + result = maybe_unbox_numpy_scalar(result) + return result diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py index 6085b577f4392..8c8fbef11e82a 100644 --- a/pandas/core/arrays/masked.py +++ b/pandas/core/arrays/masked.py @@ -12,7 +12,10 @@ import numpy as np -from pandas._config import is_nan_na +from pandas._config import ( + is_nan_na, + using_python_scalars, +) from pandas._libs import ( algos as libalgos, @@ -27,7 +30,10 @@ from pandas.errors import AbstractMethodError from pandas.core.dtypes.base import ExtensionDtype -from pandas.core.dtypes.cast import maybe_downcast_to_dtype +from pandas.core.dtypes.cast import ( + maybe_downcast_to_dtype, + maybe_unbox_numpy_scalar, +) from pandas.core.dtypes.common import ( is_bool, is_integer_dtype, @@ -1518,7 +1524,10 @@ def _reduce( if isna(result): return self._wrap_na_result(name=name, axis=0, mask_size=(1,)) else: - result = result.reshape(1) + if using_python_scalars(): + result = np.array([result]) + else: + result = result.reshape(1) mask = np.zeros(1, dtype=bool) return self._maybe_mask_result(result, mask) @@ -1742,7 +1751,7 @@ def any( values = self._data.copy() np.putmask(values, self._mask, self.dtype._falsey_value) - result = values.any() + result = maybe_unbox_numpy_scalar(values.any()) if skipna: return result else: @@ -1828,7 +1837,7 @@ def all( values = self._data.copy() np.putmask(values, self._mask, self.dtype._truthy_value) - result = values.all(axis=axis) + result = maybe_unbox_numpy_scalar(values.all(axis=axis)) if skipna: return result # type: ignore[return-value] diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py index 83015f4007793..5a1f24e43ca10 100644 --- a/pandas/core/config_init.py +++ b/pandas/core/config_init.py @@ -900,5 +900,13 @@ def register_converter_cb(key: str) -> None: validator=is_one_of_factory([True, False]), ) + cf.register_option( + "python_scalars", + False if os.environ.get("PANDAS_FUTURE_PYTHON_SCALARS", "0") == "0" else True, + "Whether to return Python scalars instead of NumPy or PyArrow scalars. " + "Currently experimental, setting to True is not recommended for end users.", + validator=is_one_of_factory([True, False]), + ) + # GH#59502 cf.deprecate_option("future.no_silent_downcasting", Pandas4Warning) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 3b615c70ebea2..d86ae281b66a9 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -20,6 +20,7 @@ from pandas._config import ( is_nan_na, + using_python_scalars, using_string_dtype, ) @@ -1434,6 +1435,16 @@ def construct_1d_arraylike_from_scalar( return subarr +def maybe_unbox_numpy_scalar(value): + result = value + if using_python_scalars() and isinstance(value, np.generic): + if isinstance(result, np.longdouble): + result = float(result) + else: + result = value.item() + return result + + def _maybe_box_and_unbox_datetimelike(value: Scalar, dtype: DtypeObj): # Caller is responsible for checking dtype.kind in "mM" diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 4186a13926c6f..ccffd03bc646e 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -92,6 +92,7 @@ common_dtype_categorical_compat, find_result_type, infer_dtype_from, + maybe_unbox_numpy_scalar, np_can_hold_element, ) from pandas.core.dtypes.common import ( @@ -7532,7 +7533,7 @@ def min(self, axis: AxisInt | None = None, skipna: bool = True, *args, **kwargs) # quick check first = self[0] if not isna(first): - return first + return maybe_unbox_numpy_scalar(first) if not self._is_multi and self.hasnans: # Take advantage of cache @@ -7543,7 +7544,7 @@ def min(self, axis: AxisInt | None = None, skipna: bool = True, *args, **kwargs) if not self._is_multi and not isinstance(self._values, np.ndarray): return self._values._reduce(name="min", skipna=skipna) - return nanops.nanmin(self._values, skipna=skipna) + return maybe_unbox_numpy_scalar(nanops.nanmin(self._values, skipna=skipna)) def max(self, axis: AxisInt | None = None, skipna: bool = True, *args, **kwargs): """ @@ -7596,18 +7597,18 @@ def max(self, axis: AxisInt | None = None, skipna: bool = True, *args, **kwargs) # quick check last = self[-1] if not isna(last): - return last + return maybe_unbox_numpy_scalar(last) if not self._is_multi and self.hasnans: # Take advantage of cache mask = self._isnan if not skipna or mask.all(): - return self._na_value + return maybe_unbox_numpy_scalar(self._na_value) if not self._is_multi and not isinstance(self._values, np.ndarray): return self._values._reduce(name="max", skipna=skipna) - return nanops.nanmax(self._values, skipna=skipna) + return maybe_unbox_numpy_scalar(nanops.nanmax(self._values, skipna=skipna)) # -------------------------------------------------------------------- diff --git a/pandas/core/interchange/column.py b/pandas/core/interchange/column.py index 1fbffe09278fc..00a8e742c6fa9 100644 --- a/pandas/core/interchange/column.py +++ b/pandas/core/interchange/column.py @@ -7,6 +7,8 @@ import numpy as np +from pandas._config import using_python_scalars + from pandas._libs.lib import infer_dtype from pandas._libs.tslibs import iNaT from pandas.errors import NoBufferPresent @@ -232,7 +234,10 @@ def null_count(self) -> int: """ Number of null elements. Should always be known. """ - return self._col.isna().sum().item() + result = self._col.isna().sum() + if not using_python_scalars(): + result = result.item() + return result @property def metadata(self) -> dict[str, pd.Index]: diff --git a/pandas/core/series.py b/pandas/core/series.py index 1bdbbd6c41f34..e62943b3fb312 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -73,6 +73,7 @@ find_common_type, infer_dtype_from, maybe_box_native, + maybe_unbox_numpy_scalar, ) from pandas.core.dtypes.common import ( is_dict_like, @@ -2014,7 +2015,7 @@ def count(self) -> int: >>> s.count() 2 """ - return notna(self._values).sum().astype("int64") + return maybe_unbox_numpy_scalar(notna(self._values).sum().astype("int64")) def mode(self, dropna: bool = True) -> Series: """ @@ -7356,7 +7357,7 @@ def _reduce( if isinstance(delegate, ExtensionArray): # dispatch to ExtensionArray interface - return delegate._reduce(name, skipna=skipna, **kwds) + result = delegate._reduce(name, skipna=skipna, **kwds) else: # dispatch to numpy arrays @@ -7370,7 +7371,10 @@ def _reduce( f"Series.{name} does not allow {kwd_name}={numeric_only} " "with non-numeric dtypes." ) - return op(delegate, skipna=skipna, **kwds) + result = op(delegate, skipna=skipna, **kwds) + + result = maybe_unbox_numpy_scalar(result) + return result @Appender(make_doc("any", ndim=1)) # error: Signature of "any" incompatible with supertype "NDFrame" diff --git a/pandas/tests/arrays/boolean/test_reduction.py b/pandas/tests/arrays/boolean/test_reduction.py index 696ae1df4c9fd..6770bf4e5b446 100644 --- a/pandas/tests/arrays/boolean/test_reduction.py +++ b/pandas/tests/arrays/boolean/test_reduction.py @@ -25,12 +25,15 @@ def data(): ([False, False], False, False, False, False), ], ) -def test_any_all(values, exp_any, exp_all, exp_any_noskip, exp_all_noskip): +def test_any_all( + values, exp_any, exp_all, exp_any_noskip, exp_all_noskip, using_python_scalars +): # the methods return numpy scalars - exp_any = pd.NA if exp_any is pd.NA else np.bool_(exp_any) - exp_all = pd.NA if exp_all is pd.NA else np.bool_(exp_all) - exp_any_noskip = pd.NA if exp_any_noskip is pd.NA else np.bool_(exp_any_noskip) - exp_all_noskip = pd.NA if exp_all_noskip is pd.NA else np.bool_(exp_all_noskip) + if not using_python_scalars: + exp_any = pd.NA if exp_any is pd.NA else np.bool_(exp_any) + exp_all = pd.NA if exp_all is pd.NA else np.bool_(exp_all) + exp_any_noskip = pd.NA if exp_any_noskip is pd.NA else np.bool_(exp_any_noskip) + exp_all_noskip = pd.NA if exp_all_noskip is pd.NA else np.bool_(exp_all_noskip) for con in [pd.array, pd.Series]: a = con(values, dtype="boolean") @@ -39,23 +42,30 @@ def test_any_all(values, exp_any, exp_all, exp_any_noskip, exp_all_noskip): assert a.any(skipna=False) is exp_any_noskip assert a.all(skipna=False) is exp_all_noskip - assert np.any(a.any()) is exp_any - assert np.all(a.all()) is exp_all - -def test_reductions_return_types(dropna, data, all_numeric_reductions): +def test_reductions_return_types( + dropna, data, all_numeric_reductions, using_python_scalars +): op = all_numeric_reductions s = pd.Series(data) if dropna: s = s.dropna() - if op in ("sum", "prod"): - assert isinstance(getattr(s, op)(), np.int_) - elif op == "count": - # Oddly on the 32 bit build (but not Windows), this is intc (!= intp) - assert isinstance(getattr(s, op)(), np.integer) - elif op in ("min", "max"): - assert isinstance(getattr(s, op)(), np.bool_) + if using_python_scalars: + expected = { + "sum": int, + "prod": int, + "count": int, + "min": bool, + "max": bool, + }.get(op, float) else: - # "mean", "std", "var", "median", "kurt", "skew" - assert isinstance(getattr(s, op)(), np.float64) + expected = { + "sum": np.int_, + "prod": np.int_, + "count": np.integer, + "min": np.bool_, + "max": np.bool_, + }.get(op, np.float64) + result = getattr(s, op)() + assert isinstance(result, expected), f"{type(result)} vs {expected}" diff --git a/pandas/tests/arrays/floating/test_function.py b/pandas/tests/arrays/floating/test_function.py index e03e8f30197b9..9d4dc81847188 100644 --- a/pandas/tests/arrays/floating/test_function.py +++ b/pandas/tests/arrays/floating/test_function.py @@ -171,7 +171,7 @@ def test_floating_array_numpy_sum(values, expected): @pytest.mark.parametrize("op", ["sum", "min", "max", "prod"]) -def test_preserve_dtypes(op): +def test_preserve_dtypes(op, using_python_scalars): df = pd.DataFrame( { "A": ["a", "b", "b"], @@ -182,7 +182,10 @@ def test_preserve_dtypes(op): # op result = getattr(df.C, op)() - assert isinstance(result, np.float64) + if using_python_scalars: + assert isinstance(result, float) + else: + assert isinstance(result, np.float64) # groupby result = getattr(df.groupby("A"), op)() diff --git a/pandas/tests/arrays/integer/test_dtypes.py b/pandas/tests/arrays/integer/test_dtypes.py index 1b4f070d47e4e..8b832b8dd151e 100644 --- a/pandas/tests/arrays/integer/test_dtypes.py +++ b/pandas/tests/arrays/integer/test_dtypes.py @@ -22,7 +22,7 @@ def test_dtypes(dtype): @pytest.mark.parametrize("op", ["sum", "min", "max", "prod"]) -def test_preserve_dtypes(op): +def test_preserve_dtypes(op, using_python_scalars): # for ops that enable (mean would actually work here # but generally it is a float return value) df = pd.DataFrame( @@ -35,7 +35,7 @@ def test_preserve_dtypes(op): # op result = getattr(df.C, op)() - if op in {"sum", "prod", "min", "max"}: + if op in {"sum", "prod", "min", "max"} and not using_python_scalars: assert isinstance(result, np.int64) else: assert isinstance(result, int) diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py index 7c4ce4c67f13d..7ee84a3768043 100644 --- a/pandas/tests/frame/test_reductions.py +++ b/pandas/tests/frame/test_reductions.py @@ -1298,7 +1298,7 @@ def wrapper(x): assert r0.all() assert r1.all() - def test_any_all_extra(self): + def test_any_all_extra(self, using_python_scalars): df = DataFrame( { "A": [True, False, False], @@ -1322,13 +1322,19 @@ def test_any_all_extra(self): tm.assert_series_equal(result, expected) # Axis is None - result = df.all(axis=None).item() + result = df.all(axis=None) + if not using_python_scalars: + result = result.item() assert result is False - result = df.any(axis=None).item() + result = df.any(axis=None) + if not using_python_scalars: + result = result.item() assert result is True - result = df[["C"]].all(axis=None).item() + result = df[["C"]].all(axis=None) + if not using_python_scalars: + result = result.item() assert result is True @pytest.mark.parametrize("axis", [0, 1]) @@ -1436,7 +1442,7 @@ def test_any_all_bool_only(self): ), ], ) - def test_any_all_np_func(self, func, data, expected): + def test_any_all_np_func(self, func, data, expected, using_python_scalars): # GH 19976 data = DataFrame(data) @@ -1463,20 +1469,30 @@ def test_any_all_np_func(self, func, data, expected): elif data.dtypes.apply(lambda x: x != "category").any(): result = func(data) - assert isinstance(result, np.bool_) - assert result.item() is expected + if using_python_scalars: + assert result is expected + else: + assert isinstance(result, np.bool_) + assert result.item() is expected # method version result = getattr(DataFrame(data), func.__name__)(axis=None) - assert isinstance(result, np.bool_) - assert result.item() is expected + if using_python_scalars: + assert result is expected + else: + assert isinstance(result, np.bool_) + assert result.item() is expected - def test_any_all_object(self): + def test_any_all_object(self, using_python_scalars): # GH 19976 - result = np.all(DataFrame(columns=["a", "b"])).item() + result = np.all(DataFrame(columns=["a", "b"])) + if not using_python_scalars: + result = result.item() assert result is True - result = np.any(DataFrame(columns=["a", "b"])).item() + result = np.any(DataFrame(columns=["a", "b"])) + if not using_python_scalars: + result = result.item() assert result is False def test_any_all_object_bool_only(self): diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py index 529f9564ff83e..0abf5f90abcc2 100644 --- a/pandas/tests/groupby/test_categorical.py +++ b/pandas/tests/groupby/test_categorical.py @@ -1511,7 +1511,7 @@ def test_dataframe_groupby_on_2_categoricals_when_observed_is_true(reduction_fun @pytest.mark.parametrize("observed", [False, None]) def test_dataframe_groupby_on_2_categoricals_when_observed_is_false( - reduction_func, observed + reduction_func, observed, using_python_scalars ): # GH 23865 # GH 27075 @@ -1553,7 +1553,9 @@ def test_dataframe_groupby_on_2_categoricals_when_observed_is_false( expected = _results_for_groupbys_with_missing_categories[reduction_func] - if expected is np.nan: + if using_python_scalars and reduction_func == "size": + assert (res.loc[unobserved_cats] == expected).all() is True + elif expected is np.nan: assert res.loc[unobserved_cats].isnull().all().all() else: assert (res.loc[unobserved_cats] == expected).all().all() diff --git a/pandas/tests/indexes/test_numpy_compat.py b/pandas/tests/indexes/test_numpy_compat.py index 86d0ca1280596..8004e97698b67 100644 --- a/pandas/tests/indexes/test_numpy_compat.py +++ b/pandas/tests/indexes/test_numpy_compat.py @@ -151,7 +151,7 @@ def test_numpy_ufuncs_other(index, func): @pytest.mark.parametrize("func", [np.maximum, np.minimum]) -def test_numpy_ufuncs_reductions(index, func, request): +def test_numpy_ufuncs_reductions(index, func): # TODO: overlap with tests.series.test_ufunc.test_reductions if len(index) == 0: pytest.skip("Test doesn't make sense for empty index.") diff --git a/pandas/tests/reductions/test_reductions.py b/pandas/tests/reductions/test_reductions.py index db27572b9da26..6cfeade5a255b 100644 --- a/pandas/tests/reductions/test_reductions.py +++ b/pandas/tests/reductions/test_reductions.py @@ -1268,11 +1268,15 @@ def test_sum_uint64(self): expected = np.uint64(10000000000000000000) tm.assert_almost_equal(result, expected) - def test_signedness_preserved_after_sum(self): + def test_signedness_preserved_after_sum(self, using_python_scalars): # GH 37491 ser = Series([1, 2, 3, 4]) - assert ser.astype("uint8").sum().dtype == "uint64" + result = ser.astype("uint8").sum() + if using_python_scalars: + assert isinstance(result, int) + else: + assert result.dtype == "uint64" class TestDatetime64SeriesReductions: diff --git a/pandas/tests/reductions/test_stat_reductions.py b/pandas/tests/reductions/test_stat_reductions.py index 4af1ca1d4800a..6aeaae51b107b 100644 --- a/pandas/tests/reductions/test_stat_reductions.py +++ b/pandas/tests/reductions/test_stat_reductions.py @@ -228,7 +228,7 @@ def test_sem(self): result = s.sem(ddof=1) assert pd.isna(result) - def test_skew(self): + def test_skew(self, using_python_scalars): sp_stats = pytest.importorskip("scipy.stats") string_series = Series(range(20), dtype=np.float64, name="series") @@ -247,7 +247,10 @@ def test_skew(self): assert np.isnan(df.skew()).all() else: assert 0 == s.skew() - assert isinstance(s.skew(), np.float64) # GH53482 + if using_python_scalars: + assert isinstance(s.skew(), float) + else: + assert isinstance(s.skew(), np.float64) # GH53482 assert (df.skew() == 0).all() def test_kurt(self): @@ -258,7 +261,7 @@ def test_kurt(self): alt = lambda x: sp_stats.kurtosis(x, bias=False) self._check_stat_op("kurt", alt, string_series) - def test_kurt_corner(self): + def test_kurt_corner(self, using_python_scalars): # test corner cases, kurt() returns NaN unless there's at least 4 # values min_N = 4 @@ -270,5 +273,8 @@ def test_kurt_corner(self): assert np.isnan(df.kurt()).all() else: assert 0 == s.kurt() - assert isinstance(s.kurt(), np.float64) # GH53482 + if using_python_scalars: + assert isinstance(s.kurt(), float) + else: + assert isinstance(s.kurt(), np.float64) # GH53482 assert (df.kurt() == 0).all() diff --git a/pandas/tests/series/test_ufunc.py b/pandas/tests/series/test_ufunc.py index 5faacbb5559a9..7615510268a7a 100644 --- a/pandas/tests/series/test_ufunc.py +++ b/pandas/tests/series/test_ufunc.py @@ -330,7 +330,7 @@ def test_add(self, values_for_np_reduce, box_with_array): with pytest.raises(TypeError, match=msg): np.add.reduce(obj) - def test_max(self, values_for_np_reduce, box_with_array): + def test_max(self, values_for_np_reduce, box_with_array, using_python_scalars): box = box_with_array values = values_for_np_reduce @@ -349,12 +349,14 @@ def test_max(self, values_for_np_reduce, box_with_array): tm.assert_series_equal(result, expected) else: expected = values[1] + if using_python_scalars and values.dtype.kind in ["i", "f"]: + expected = expected.item() assert result == expected if same_type: # check we have e.g. Timestamp instead of dt64 assert type(result) == type(expected) - def test_min(self, values_for_np_reduce, box_with_array): + def test_min(self, values_for_np_reduce, box_with_array, using_python_scalars): box = box_with_array values = values_for_np_reduce @@ -372,6 +374,8 @@ def test_min(self, values_for_np_reduce, box_with_array): tm.assert_series_equal(result, expected) else: expected = values[0] + if using_python_scalars and values.dtype.kind in ["i", "f"]: + expected = expected.item() assert result == expected if same_type: # check we have e.g. Timestamp instead of dt64 diff --git a/pandas/tests/test_nanops.py b/pandas/tests/test_nanops.py index 6788f2056bb9a..1b84a3e578ead 100644 --- a/pandas/tests/test_nanops.py +++ b/pandas/tests/test_nanops.py @@ -1274,17 +1274,20 @@ def test_check_bottleneck_disallow(any_real_numpy_dtype, func): @pytest.mark.parametrize("val", [2**55, -(2**55), 20150515061816532]) -def test_nanmean_overflow(disable_bottleneck, val): +def test_nanmean_overflow(disable_bottleneck, val, using_python_scalars): # GH 10155 # In the previous implementation mean can overflow for int dtypes, it # is now consistent with numpy ser = Series(val, index=range(500), dtype=np.int64) result = ser.mean() - np_result = ser.values.mean() assert result == val - assert result == np_result - assert result.dtype == np.float64 + if using_python_scalars: + assert isinstance(result, float) + else: + np_result = ser.values.mean() + assert result == np_result + assert result.dtype == np.float64 @pytest.mark.parametrize( @@ -1299,13 +1302,18 @@ def test_nanmean_overflow(disable_bottleneck, val): ], ) @pytest.mark.parametrize("method", ["mean", "std", "var", "skew", "kurt", "min", "max"]) -def test_returned_dtype(disable_bottleneck, dtype, method): +def test_returned_dtype(disable_bottleneck, dtype, method, using_python_scalars): if dtype is None: pytest.skip("np.float128 not available") ser = Series(range(10), dtype=dtype) result = getattr(ser, method)() - if is_integer_dtype(dtype) and method not in ["min", "max"]: + if using_python_scalars: + if is_integer_dtype(dtype) and method in ["min", "max"]: + assert isinstance(result, int) + else: + assert isinstance(result, float) + elif is_integer_dtype(dtype) and method not in ["min", "max"]: assert result.dtype == np.float64 else: assert result.dtype == dtype diff --git a/pandas/tests/window/moments/test_moments_consistency_ewm.py b/pandas/tests/window/moments/test_moments_consistency_ewm.py index 49dee50954f4f..b24e1f822e998 100644 --- a/pandas/tests/window/moments/test_moments_consistency_ewm.py +++ b/pandas/tests/window/moments/test_moments_consistency_ewm.py @@ -133,7 +133,10 @@ def test_moments_consistency_var(all_data, adjust, ignore_na, min_periods, bias) var_x = all_data.ewm( com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na ).var(bias=bias) - assert not (var_x < 0).any().any() + if isinstance(all_data, Series): + assert not (var_x < 0).any() + else: + assert not (var_x < 0).any().any() if bias: # check that biased var(x) == mean(x^2) - mean(x)^2 @@ -156,7 +159,10 @@ def test_moments_consistency_var_constant( ).var(bias=bias) # check that variance of constant series is identically 0 - assert not (var_x > 0).any().any() + if isinstance(consistent_data, Series): + assert not (var_x > 0).any() + else: + assert not (var_x > 0).any().any() expected = consistent_data * np.nan expected[count_x >= max(min_periods, 1)] = 0.0 if not bias: @@ -170,12 +176,18 @@ def test_ewm_consistency_std(all_data, adjust, ignore_na, min_periods, bias): var_x = all_data.ewm( com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na ).var(bias=bias) - assert not (var_x < 0).any().any() + if isinstance(all_data, Series): + assert not (var_x < 0).any() + else: + assert not (var_x < 0).any().any() std_x = all_data.ewm( com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na ).std(bias=bias) - assert not (std_x < 0).any().any() + if isinstance(all_data, Series): + assert not (std_x < 0).any() + else: + assert not (std_x < 0).any().any() # check that var(x) == std(x)^2 tm.assert_equal(var_x, std_x * std_x) @@ -183,7 +195,10 @@ def test_ewm_consistency_std(all_data, adjust, ignore_na, min_periods, bias): cov_x_x = all_data.ewm( com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na ).cov(all_data, bias=bias) - assert not (cov_x_x < 0).any().any() + if isinstance(all_data, Series): + assert not (cov_x_x < 0).any() + else: + assert not (cov_x_x < 0).any().any() # check that var(x) == cov(x, x) tm.assert_equal(var_x, cov_x_x) diff --git a/pandas/tests/window/moments/test_moments_consistency_expanding.py b/pandas/tests/window/moments/test_moments_consistency_expanding.py index 7d2fa1ad5d211..894220d831300 100644 --- a/pandas/tests/window/moments/test_moments_consistency_expanding.py +++ b/pandas/tests/window/moments/test_moments_consistency_expanding.py @@ -6,11 +6,17 @@ def no_nans(x): - return x.notna().all().all() + if isinstance(x, Series): + return x.notna().all() + else: + return x.notna().all().all() def all_na(x): - return x.isnull().all().all() + if isinstance(x, Series): + return x.isnull().all() + else: + return x.isnull().all().all() @pytest.mark.parametrize("f", [lambda v: Series(v).sum(), np.nansum, np.sum]) @@ -32,7 +38,10 @@ def test_expanding_apply_consistency_sum_nans(request, all_data, min_periods, f) @pytest.mark.parametrize("ddof", [0, 1]) def test_moments_consistency_var(all_data, min_periods, ddof): var_x = all_data.expanding(min_periods=min_periods).var(ddof=ddof) - assert not (var_x < 0).any().any() + if isinstance(all_data, Series): + assert not (var_x < 0).any() + else: + assert not (var_x < 0).any().any() if ddof == 0: # check that biased var(x) == mean(x^2) - mean(x)^2 @@ -47,7 +56,10 @@ def test_moments_consistency_var_constant(consistent_data, min_periods, ddof): var_x = consistent_data.expanding(min_periods=min_periods).var(ddof=ddof) # check that variance of constant series is identically 0 - assert not (var_x > 0).any().any() + if isinstance(consistent_data, Series): + assert not (var_x > 0).any() + else: + assert not (var_x > 0).any().any() expected = consistent_data * np.nan expected[count_x >= max(min_periods, 1)] = 0.0 if ddof == 1: @@ -58,16 +70,25 @@ def test_moments_consistency_var_constant(consistent_data, min_periods, ddof): @pytest.mark.parametrize("ddof", [0, 1]) def test_expanding_consistency_var_std_cov(all_data, min_periods, ddof): var_x = all_data.expanding(min_periods=min_periods).var(ddof=ddof) - assert not (var_x < 0).any().any() + if isinstance(all_data, Series): + assert not (var_x < 0).any() + else: + assert not (var_x < 0).any().any() std_x = all_data.expanding(min_periods=min_periods).std(ddof=ddof) - assert not (std_x < 0).any().any() + if isinstance(all_data, Series): + assert not (std_x < 0).any() + else: + assert not (std_x < 0).any().any() # check that var(x) == std(x)^2 tm.assert_equal(var_x, std_x * std_x) cov_x_x = all_data.expanding(min_periods=min_periods).cov(all_data, ddof=ddof) - assert not (cov_x_x < 0).any().any() + if isinstance(all_data, Series): + assert not (cov_x_x < 0).any() + else: + assert not (cov_x_x < 0).any().any() # check that var(x) == cov(x, x) tm.assert_equal(var_x, cov_x_x) diff --git a/pandas/tests/window/moments/test_moments_consistency_rolling.py b/pandas/tests/window/moments/test_moments_consistency_rolling.py index be22338c00cb2..80d02eb51199d 100644 --- a/pandas/tests/window/moments/test_moments_consistency_rolling.py +++ b/pandas/tests/window/moments/test_moments_consistency_rolling.py @@ -1,16 +1,24 @@ import numpy as np import pytest +from pandas._config import using_python_scalars + from pandas import Series import pandas._testing as tm def no_nans(x): - return x.notna().all().all() + if using_python_scalars() and isinstance(x, Series): + return x.notna().all() + else: + return x.notna().all().all() def all_na(x): - return x.isnull().all().all() + if using_python_scalars() and isinstance(x, Series): + return x.isnull().all() + else: + return x.isnull().all().all() @pytest.fixture(params=[(1, 0), (5, 1)]) @@ -48,7 +56,10 @@ def test_moments_consistency_var(all_data, rolling_consistency_cases, center, dd var_x = all_data.rolling(window=window, min_periods=min_periods, center=center).var( ddof=ddof ) - assert not (var_x < 0).any().any() + if isinstance(all_data, Series): + assert not (var_x < 0).any() + else: + assert not (var_x < 0).any().any() if ddof == 0: # check that biased var(x) == mean(x^2) - mean(x)^2 @@ -77,7 +88,10 @@ def test_moments_consistency_var_constant( ).var(ddof=ddof) # check that variance of constant series is identically 0 - assert not (var_x > 0).any().any() + if isinstance(consistent_data, Series): + assert not (var_x > 0).any() + else: + assert not (var_x > 0).any().any() expected = consistent_data * np.nan expected[count_x >= max(min_periods, 1)] = 0.0 if ddof == 1: @@ -94,12 +108,18 @@ def test_rolling_consistency_var_std_cov( var_x = all_data.rolling(window=window, min_periods=min_periods, center=center).var( ddof=ddof ) - assert not (var_x < 0).any().any() + if isinstance(all_data, Series): + assert not (var_x < 0).any() + else: + assert not (var_x < 0).any().any() std_x = all_data.rolling(window=window, min_periods=min_periods, center=center).std( ddof=ddof ) - assert not (std_x < 0).any().any() + if isinstance(all_data, Series): + assert not (std_x < 0).any() + else: + assert not (std_x < 0).any().any() # check that var(x) == std(x)^2 tm.assert_equal(var_x, std_x * std_x) @@ -107,7 +127,10 @@ def test_rolling_consistency_var_std_cov( cov_x_x = all_data.rolling( window=window, min_periods=min_periods, center=center ).cov(all_data, ddof=ddof) - assert not (cov_x_x < 0).any().any() + if isinstance(all_data, Series): + assert not (cov_x_x < 0).any() + else: + assert not (cov_x_x < 0).any().any() # check that var(x) == cov(x, x) tm.assert_equal(var_x, cov_x_x)