Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .github/workflows/unit-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ jobs:
# Prevent the include jobs from overriding other jobs
pattern: [""]
pandas_future_infer_string: ["1"]
pandas_future_python_scalars: ["0"]
include:
- name: "Downstream Compat"
env_file: actions-313-downstream_compat.yaml
Expand Down Expand Up @@ -75,6 +76,10 @@ jobs:
env_file: actions-313.yaml
pandas_future_infer_string: "0"
platform: ubuntu-24.04
- name: "PANDAS_FUTURE_PYTHON_SCALARS=1"
env_file: actions-313.yaml
pandas_future_python_scalars: "1"
platform: ubuntu-24.04
- name: "Numpy Dev"
env_file: actions-313-numpydev.yaml
pattern: "not slow and not network and not single_cpu"
Expand All @@ -92,6 +97,7 @@ jobs:
LC_ALL: ${{ matrix.lc_all || '' }}
PANDAS_CI: '1'
PANDAS_FUTURE_INFER_STRING: ${{ matrix.pandas_future_infer_string || '1' }}
PANDAS_FUTURE_PYTHON_SCALARS: ${{ matrix.pandas_future_python_scalars || '0' }}
TEST_ARGS: ${{ matrix.test_args || '' }}
PYTEST_WORKERS: 'auto'
PYTEST_TARGET: ${{ matrix.pytest_target || 'pandas' }}
Expand Down
5 changes: 5 additions & 0 deletions pandas/_config/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,11 @@ def using_string_dtype() -> bool:
return _mode_options["infer_string"]


def using_python_scalars() -> bool:
_mode_options = _global_config["future"]
return _mode_options["python_scalars"]


def is_nan_na() -> bool:
_mode_options = _global_config["mode"]
return _mode_options["nan_is_na"]
5 changes: 5 additions & 0 deletions pandas/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -2097,6 +2097,11 @@ def using_infer_string() -> bool:
return pd.options.future.infer_string is True


@pytest.fixture
def using_python_scalars() -> bool:
return pd.options.future.python_scalars is True


_warsaws: list[Any] = ["Europe/Warsaw", "dateutil/Europe/Warsaw"]
if pytz is not None:
_warsaws.append(pytz.timezone("Europe/Warsaw"))
Expand Down
5 changes: 4 additions & 1 deletion pandas/core/arraylike.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from pandas._libs import lib
from pandas._libs.ops_dispatch import maybe_dispatch_ufunc_to_dunder_op

from pandas.core.dtypes.cast import maybe_unbox_numpy_scalar
from pandas.core.dtypes.generic import ABCNDFrame

from pandas.core import roperator
Expand Down Expand Up @@ -529,4 +530,6 @@ def dispatch_reduction_ufunc(self, ufunc: np.ufunc, method: str, *inputs, **kwar

# By default, numpy's reductions do not skip NaNs, so we have to
# pass skipna=False
return getattr(self, method_name)(skipna=False, **kwargs)
result = getattr(self, method_name)(skipna=False, **kwargs)
result = maybe_unbox_numpy_scalar(result)
return result
19 changes: 14 additions & 5 deletions pandas/core/arrays/masked.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,10 @@

import numpy as np

from pandas._config import is_nan_na
from pandas._config import (
is_nan_na,
using_python_scalars,
)

from pandas._libs import (
algos as libalgos,
Expand All @@ -27,7 +30,10 @@
from pandas.errors import AbstractMethodError

from pandas.core.dtypes.base import ExtensionDtype
from pandas.core.dtypes.cast import maybe_downcast_to_dtype
from pandas.core.dtypes.cast import (
maybe_downcast_to_dtype,
maybe_unbox_numpy_scalar,
)
from pandas.core.dtypes.common import (
is_bool,
is_integer_dtype,
Expand Down Expand Up @@ -1518,7 +1524,10 @@ def _reduce(
if isna(result):
return self._wrap_na_result(name=name, axis=0, mask_size=(1,))
else:
result = result.reshape(1)
if using_python_scalars():
result = np.array([result])
else:
result = result.reshape(1)
mask = np.zeros(1, dtype=bool)
return self._maybe_mask_result(result, mask)

Expand Down Expand Up @@ -1742,7 +1751,7 @@ def any(

values = self._data.copy()
np.putmask(values, self._mask, self.dtype._falsey_value)
result = values.any()
result = maybe_unbox_numpy_scalar(values.any())
if skipna:
return result
else:
Expand Down Expand Up @@ -1828,7 +1837,7 @@ def all(

values = self._data.copy()
np.putmask(values, self._mask, self.dtype._truthy_value)
result = values.all(axis=axis)
result = maybe_unbox_numpy_scalar(values.all(axis=axis))

if skipna:
return result # type: ignore[return-value]
Expand Down
8 changes: 8 additions & 0 deletions pandas/core/config_init.py
Original file line number Diff line number Diff line change
Expand Up @@ -900,5 +900,13 @@ def register_converter_cb(key: str) -> None:
validator=is_one_of_factory([True, False]),
)

cf.register_option(
"python_scalars",
False if os.environ.get("PANDAS_FUTURE_PYTHON_SCALARS", "0") == "0" else True,
"Whether to return Python scalars instead of NumPy or PyArrow scalars. "
"Currently experimental, setting to True is not recommended for end users.",
validator=is_one_of_factory([True, False]),
)

# GH#59502
cf.deprecate_option("future.no_silent_downcasting", Pandas4Warning)
11 changes: 11 additions & 0 deletions pandas/core/dtypes/cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

from pandas._config import (
is_nan_na,
using_python_scalars,
using_string_dtype,
)

Expand Down Expand Up @@ -1434,6 +1435,16 @@ def construct_1d_arraylike_from_scalar(
return subarr


def maybe_unbox_numpy_scalar(value):
result = value
if using_python_scalars() and isinstance(value, np.generic):
if isinstance(result, np.longdouble):
result = float(result)
else:
result = value.item()
return result


def _maybe_box_and_unbox_datetimelike(value: Scalar, dtype: DtypeObj):
# Caller is responsible for checking dtype.kind in "mM"

Expand Down
11 changes: 6 additions & 5 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@
common_dtype_categorical_compat,
find_result_type,
infer_dtype_from,
maybe_unbox_numpy_scalar,
np_can_hold_element,
)
from pandas.core.dtypes.common import (
Expand Down Expand Up @@ -7532,7 +7533,7 @@ def min(self, axis: AxisInt | None = None, skipna: bool = True, *args, **kwargs)
# quick check
first = self[0]
if not isna(first):
return first
return maybe_unbox_numpy_scalar(first)

if not self._is_multi and self.hasnans:
# Take advantage of cache
Expand All @@ -7543,7 +7544,7 @@ def min(self, axis: AxisInt | None = None, skipna: bool = True, *args, **kwargs)
if not self._is_multi and not isinstance(self._values, np.ndarray):
return self._values._reduce(name="min", skipna=skipna)

return nanops.nanmin(self._values, skipna=skipna)
return maybe_unbox_numpy_scalar(nanops.nanmin(self._values, skipna=skipna))

def max(self, axis: AxisInt | None = None, skipna: bool = True, *args, **kwargs):
"""
Expand Down Expand Up @@ -7596,18 +7597,18 @@ def max(self, axis: AxisInt | None = None, skipna: bool = True, *args, **kwargs)
# quick check
last = self[-1]
if not isna(last):
return last
return maybe_unbox_numpy_scalar(last)

if not self._is_multi and self.hasnans:
# Take advantage of cache
mask = self._isnan
if not skipna or mask.all():
return self._na_value
return maybe_unbox_numpy_scalar(self._na_value)

if not self._is_multi and not isinstance(self._values, np.ndarray):
return self._values._reduce(name="max", skipna=skipna)

return nanops.nanmax(self._values, skipna=skipna)
return maybe_unbox_numpy_scalar(nanops.nanmax(self._values, skipna=skipna))

# --------------------------------------------------------------------

Expand Down
7 changes: 6 additions & 1 deletion pandas/core/interchange/column.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@

import numpy as np

from pandas._config import using_python_scalars

from pandas._libs.lib import infer_dtype
from pandas._libs.tslibs import iNaT
from pandas.errors import NoBufferPresent
Expand Down Expand Up @@ -232,7 +234,10 @@ def null_count(self) -> int:
"""
Number of null elements. Should always be known.
"""
return self._col.isna().sum().item()
result = self._col.isna().sum()
if not using_python_scalars():
result = result.item()
return result

@property
def metadata(self) -> dict[str, pd.Index]:
Expand Down
10 changes: 7 additions & 3 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@
find_common_type,
infer_dtype_from,
maybe_box_native,
maybe_unbox_numpy_scalar,
)
from pandas.core.dtypes.common import (
is_dict_like,
Expand Down Expand Up @@ -2014,7 +2015,7 @@ def count(self) -> int:
>>> s.count()
2
"""
return notna(self._values).sum().astype("int64")
return maybe_unbox_numpy_scalar(notna(self._values).sum().astype("int64"))

def mode(self, dropna: bool = True) -> Series:
"""
Expand Down Expand Up @@ -7356,7 +7357,7 @@ def _reduce(

if isinstance(delegate, ExtensionArray):
# dispatch to ExtensionArray interface
return delegate._reduce(name, skipna=skipna, **kwds)
result = delegate._reduce(name, skipna=skipna, **kwds)

else:
# dispatch to numpy arrays
Expand All @@ -7370,7 +7371,10 @@ def _reduce(
f"Series.{name} does not allow {kwd_name}={numeric_only} "
"with non-numeric dtypes."
)
return op(delegate, skipna=skipna, **kwds)
result = op(delegate, skipna=skipna, **kwds)

result = maybe_unbox_numpy_scalar(result)
return result

@Appender(make_doc("any", ndim=1))
# error: Signature of "any" incompatible with supertype "NDFrame"
Expand Down
46 changes: 28 additions & 18 deletions pandas/tests/arrays/boolean/test_reduction.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,15 @@ def data():
([False, False], False, False, False, False),
],
)
def test_any_all(values, exp_any, exp_all, exp_any_noskip, exp_all_noskip):
def test_any_all(
values, exp_any, exp_all, exp_any_noskip, exp_all_noskip, using_python_scalars
):
# the methods return numpy scalars
exp_any = pd.NA if exp_any is pd.NA else np.bool_(exp_any)
exp_all = pd.NA if exp_all is pd.NA else np.bool_(exp_all)
exp_any_noskip = pd.NA if exp_any_noskip is pd.NA else np.bool_(exp_any_noskip)
exp_all_noskip = pd.NA if exp_all_noskip is pd.NA else np.bool_(exp_all_noskip)
if not using_python_scalars:
exp_any = pd.NA if exp_any is pd.NA else np.bool_(exp_any)
exp_all = pd.NA if exp_all is pd.NA else np.bool_(exp_all)
exp_any_noskip = pd.NA if exp_any_noskip is pd.NA else np.bool_(exp_any_noskip)
exp_all_noskip = pd.NA if exp_all_noskip is pd.NA else np.bool_(exp_all_noskip)

for con in [pd.array, pd.Series]:
a = con(values, dtype="boolean")
Expand All @@ -39,23 +42,30 @@ def test_any_all(values, exp_any, exp_all, exp_any_noskip, exp_all_noskip):
assert a.any(skipna=False) is exp_any_noskip
assert a.all(skipna=False) is exp_all_noskip

assert np.any(a.any()) is exp_any
assert np.all(a.all()) is exp_all


def test_reductions_return_types(dropna, data, all_numeric_reductions):
def test_reductions_return_types(
dropna, data, all_numeric_reductions, using_python_scalars
):
op = all_numeric_reductions
s = pd.Series(data)
if dropna:
s = s.dropna()

if op in ("sum", "prod"):
assert isinstance(getattr(s, op)(), np.int_)
elif op == "count":
# Oddly on the 32 bit build (but not Windows), this is intc (!= intp)
assert isinstance(getattr(s, op)(), np.integer)
elif op in ("min", "max"):
assert isinstance(getattr(s, op)(), np.bool_)
if using_python_scalars:
expected = {
"sum": int,
"prod": int,
"count": int,
"min": bool,
"max": bool,
}.get(op, float)
else:
# "mean", "std", "var", "median", "kurt", "skew"
assert isinstance(getattr(s, op)(), np.float64)
expected = {
"sum": np.int_,
"prod": np.int_,
"count": np.integer,
"min": np.bool_,
"max": np.bool_,
}.get(op, np.float64)
result = getattr(s, op)()
assert isinstance(result, expected), f"{type(result)} vs {expected}"
7 changes: 5 additions & 2 deletions pandas/tests/arrays/floating/test_function.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ def test_floating_array_numpy_sum(values, expected):


@pytest.mark.parametrize("op", ["sum", "min", "max", "prod"])
def test_preserve_dtypes(op):
def test_preserve_dtypes(op, using_python_scalars):
df = pd.DataFrame(
{
"A": ["a", "b", "b"],
Expand All @@ -182,7 +182,10 @@ def test_preserve_dtypes(op):

# op
result = getattr(df.C, op)()
assert isinstance(result, np.float64)
if using_python_scalars:
assert isinstance(result, float)
else:
assert isinstance(result, np.float64)

# groupby
result = getattr(df.groupby("A"), op)()
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/arrays/integer/test_dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def test_dtypes(dtype):


@pytest.mark.parametrize("op", ["sum", "min", "max", "prod"])
def test_preserve_dtypes(op):
def test_preserve_dtypes(op, using_python_scalars):
# for ops that enable (mean would actually work here
# but generally it is a float return value)
df = pd.DataFrame(
Expand All @@ -35,7 +35,7 @@ def test_preserve_dtypes(op):

# op
result = getattr(df.C, op)()
if op in {"sum", "prod", "min", "max"}:
if op in {"sum", "prod", "min", "max"} and not using_python_scalars:
assert isinstance(result, np.int64)
else:
assert isinstance(result, int)
Expand Down
Loading
Loading