diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml
index 6cf9381a70d5b..46d6c7b5400a8 100644
--- a/.github/workflows/unit-tests.yml
+++ b/.github/workflows/unit-tests.yml
@@ -31,6 +31,7 @@ jobs:
         # Prevent the include jobs from overriding other jobs
         pattern: [""]
         pandas_future_infer_string: ["1"]
+        pandas_future_python_scalars: ["0"]
         include:
           - name: "Downstream Compat"
             env_file: actions-313-downstream_compat.yaml
@@ -75,6 +76,10 @@ jobs:
             env_file: actions-313.yaml
             pandas_future_infer_string: "0"
             platform: ubuntu-24.04
+          - name: "PANDAS_FUTURE_PYTHON_SCALARS=1"
+            env_file: actions-313.yaml
+            pandas_future_python_scalars: "1"
+            platform: ubuntu-24.04
           - name: "Numpy Dev"
             env_file: actions-313-numpydev.yaml
             pattern: "not slow and not network and not single_cpu"
@@ -92,6 +97,7 @@ jobs:
       LC_ALL: ${{ matrix.lc_all || '' }}
       PANDAS_CI: '1'
       PANDAS_FUTURE_INFER_STRING: ${{ matrix.pandas_future_infer_string || '1' }}
+      PANDAS_FUTURE_PYTHON_SCALARS: ${{ matrix.pandas_future_python_scalars || '0' }}
       TEST_ARGS: ${{ matrix.test_args || '' }}
       PYTEST_WORKERS: 'auto'
       PYTEST_TARGET: ${{ matrix.pytest_target || 'pandas' }}
diff --git a/pandas/_config/__init__.py b/pandas/_config/__init__.py
index ee709eff2eeae..dc380e3777f4e 100644
--- a/pandas/_config/__init__.py
+++ b/pandas/_config/__init__.py
@@ -35,6 +35,11 @@ def using_string_dtype() -> bool:
     return _mode_options["infer_string"]
 
 
+def using_python_scalars() -> bool:
+    _mode_options = _global_config["future"]
+    return _mode_options["python_scalars"]
+
+
 def is_nan_na() -> bool:
     _mode_options = _global_config["mode"]
     return _mode_options["nan_is_na"]
diff --git a/pandas/conftest.py b/pandas/conftest.py
index 7fe4ec7a5ee4f..ba4d44dbb1b8a 100644
--- a/pandas/conftest.py
+++ b/pandas/conftest.py
@@ -2097,6 +2097,11 @@ def using_infer_string() -> bool:
     return pd.options.future.infer_string is True
 
 
+@pytest.fixture
+def using_python_scalars() -> bool:
+    return pd.options.future.python_scalars is True
+
+
 _warsaws: list[Any] = ["Europe/Warsaw", "dateutil/Europe/Warsaw"]
 if pytz is not None:
     _warsaws.append(pytz.timezone("Europe/Warsaw"))
diff --git a/pandas/core/arraylike.py b/pandas/core/arraylike.py
index eeef8016db07f..1e90cf1949a7a 100644
--- a/pandas/core/arraylike.py
+++ b/pandas/core/arraylike.py
@@ -15,6 +15,7 @@
 from pandas._libs import lib
 from pandas._libs.ops_dispatch import maybe_dispatch_ufunc_to_dunder_op
 
+from pandas.core.dtypes.cast import maybe_unbox_numpy_scalar
 from pandas.core.dtypes.generic import ABCNDFrame
 
 from pandas.core import roperator
@@ -529,4 +530,6 @@ def dispatch_reduction_ufunc(self, ufunc: np.ufunc, method: str, *inputs, **kwar
 
     # By default, numpy's reductions do not skip NaNs, so we have to
     #  pass skipna=False
-    return getattr(self, method_name)(skipna=False, **kwargs)
+    result = getattr(self, method_name)(skipna=False, **kwargs)
+    result = maybe_unbox_numpy_scalar(result)
+    return result
diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py
index 6085b577f4392..8c8fbef11e82a 100644
--- a/pandas/core/arrays/masked.py
+++ b/pandas/core/arrays/masked.py
@@ -12,7 +12,10 @@
 
 import numpy as np
 
-from pandas._config import is_nan_na
+from pandas._config import (
+    is_nan_na,
+    using_python_scalars,
+)
 
 from pandas._libs import (
     algos as libalgos,
@@ -27,7 +30,10 @@
 from pandas.errors import AbstractMethodError
 
 from pandas.core.dtypes.base import ExtensionDtype
-from pandas.core.dtypes.cast import maybe_downcast_to_dtype
+from pandas.core.dtypes.cast import (
+    maybe_downcast_to_dtype,
+    maybe_unbox_numpy_scalar,
+)
 from pandas.core.dtypes.common import (
     is_bool,
     is_integer_dtype,
@@ -1518,7 +1524,10 @@ def _reduce(
             if isna(result):
                 return self._wrap_na_result(name=name, axis=0, mask_size=(1,))
             else:
-                result = result.reshape(1)
+                if using_python_scalars():
+                    result = np.array([result])
+                else:
+                    result = result.reshape(1)
                 mask = np.zeros(1, dtype=bool)
                 return self._maybe_mask_result(result, mask)
 
@@ -1742,7 +1751,7 @@ def any(
 
         values = self._data.copy()
         np.putmask(values, self._mask, self.dtype._falsey_value)
-        result = values.any()
+        result = maybe_unbox_numpy_scalar(values.any())
         if skipna:
             return result
         else:
@@ -1828,7 +1837,7 @@ def all(
 
         values = self._data.copy()
         np.putmask(values, self._mask, self.dtype._truthy_value)
-        result = values.all(axis=axis)
+        result = maybe_unbox_numpy_scalar(values.all(axis=axis))
 
         if skipna:
             return result  # type: ignore[return-value]
diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py
index 83015f4007793..5a1f24e43ca10 100644
--- a/pandas/core/config_init.py
+++ b/pandas/core/config_init.py
@@ -900,5 +900,13 @@ def register_converter_cb(key: str) -> None:
         validator=is_one_of_factory([True, False]),
     )
 
+    cf.register_option(
+        "python_scalars",
+        False if os.environ.get("PANDAS_FUTURE_PYTHON_SCALARS", "0") == "0" else True,
+        "Whether to return Python scalars instead of NumPy or PyArrow scalars. "
+        "Currently experimental, setting to True is not recommended for end users.",
+        validator=is_one_of_factory([True, False]),
+    )
+
 # GH#59502
 cf.deprecate_option("future.no_silent_downcasting", Pandas4Warning)
diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
index 3b615c70ebea2..d86ae281b66a9 100644
--- a/pandas/core/dtypes/cast.py
+++ b/pandas/core/dtypes/cast.py
@@ -20,6 +20,7 @@
 
 from pandas._config import (
     is_nan_na,
+    using_python_scalars,
     using_string_dtype,
 )
 
@@ -1434,6 +1435,16 @@ def construct_1d_arraylike_from_scalar(
     return subarr
 
 
+def maybe_unbox_numpy_scalar(value):
+    result = value
+    if using_python_scalars() and isinstance(value, np.generic):
+        if isinstance(result, np.longdouble):
+            result = float(result)
+        else:
+            result = value.item()
+    return result
+
+
 def _maybe_box_and_unbox_datetimelike(value: Scalar, dtype: DtypeObj):
     # Caller is responsible for checking dtype.kind in "mM"
 
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index 4186a13926c6f..ccffd03bc646e 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -92,6 +92,7 @@
     common_dtype_categorical_compat,
     find_result_type,
     infer_dtype_from,
+    maybe_unbox_numpy_scalar,
     np_can_hold_element,
 )
 from pandas.core.dtypes.common import (
@@ -7532,7 +7533,7 @@ def min(self, axis: AxisInt | None = None, skipna: bool = True, *args, **kwargs)
             # quick check
             first = self[0]
             if not isna(first):
-                return first
+                return maybe_unbox_numpy_scalar(first)
 
         if not self._is_multi and self.hasnans:
             # Take advantage of cache
@@ -7543,7 +7544,7 @@ def min(self, axis: AxisInt | None = None, skipna: bool = True, *args, **kwargs)
         if not self._is_multi and not isinstance(self._values, np.ndarray):
             return self._values._reduce(name="min", skipna=skipna)
 
-        return nanops.nanmin(self._values, skipna=skipna)
+        return maybe_unbox_numpy_scalar(nanops.nanmin(self._values, skipna=skipna))
 
     def max(self, axis: AxisInt | None = None, skipna: bool = True, *args, **kwargs):
         """
@@ -7596,18 +7597,18 @@ def max(self, axis: AxisInt | None = None, skipna: bool = True, *args, **kwargs)
             # quick check
             last = self[-1]
             if not isna(last):
-                return last
+                return maybe_unbox_numpy_scalar(last)
 
         if not self._is_multi and self.hasnans:
             # Take advantage of cache
             mask = self._isnan
             if not skipna or mask.all():
-                return self._na_value
+                return maybe_unbox_numpy_scalar(self._na_value)
 
         if not self._is_multi and not isinstance(self._values, np.ndarray):
             return self._values._reduce(name="max", skipna=skipna)
 
-        return nanops.nanmax(self._values, skipna=skipna)
+        return maybe_unbox_numpy_scalar(nanops.nanmax(self._values, skipna=skipna))
 
     # --------------------------------------------------------------------
 
diff --git a/pandas/core/interchange/column.py b/pandas/core/interchange/column.py
index 1fbffe09278fc..00a8e742c6fa9 100644
--- a/pandas/core/interchange/column.py
+++ b/pandas/core/interchange/column.py
@@ -7,6 +7,8 @@
 
 import numpy as np
 
+from pandas._config import using_python_scalars
+
 from pandas._libs.lib import infer_dtype
 from pandas._libs.tslibs import iNaT
 from pandas.errors import NoBufferPresent
@@ -232,7 +234,10 @@ def null_count(self) -> int:
         """
         Number of null elements. Should always be known.
         """
-        return self._col.isna().sum().item()
+        result = self._col.isna().sum()
+        if not using_python_scalars():
+            result = result.item()
+        return result
 
     @property
     def metadata(self) -> dict[str, pd.Index]:
diff --git a/pandas/core/series.py b/pandas/core/series.py
index 1bdbbd6c41f34..e62943b3fb312 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -73,6 +73,7 @@
     find_common_type,
     infer_dtype_from,
     maybe_box_native,
+    maybe_unbox_numpy_scalar,
 )
 from pandas.core.dtypes.common import (
     is_dict_like,
@@ -2014,7 +2015,7 @@ def count(self) -> int:
         >>> s.count()
         2
         """
-        return notna(self._values).sum().astype("int64")
+        return maybe_unbox_numpy_scalar(notna(self._values).sum().astype("int64"))
 
     def mode(self, dropna: bool = True) -> Series:
         """
@@ -7356,7 +7357,7 @@ def _reduce(
 
         if isinstance(delegate, ExtensionArray):
             # dispatch to ExtensionArray interface
-            return delegate._reduce(name, skipna=skipna, **kwds)
+            result = delegate._reduce(name, skipna=skipna, **kwds)
 
         else:
             # dispatch to numpy arrays
@@ -7370,7 +7371,10 @@ def _reduce(
                     f"Series.{name} does not allow {kwd_name}={numeric_only} "
                     "with non-numeric dtypes."
                 )
-            return op(delegate, skipna=skipna, **kwds)
+            result = op(delegate, skipna=skipna, **kwds)
+
+        result = maybe_unbox_numpy_scalar(result)
+        return result
 
     @Appender(make_doc("any", ndim=1))
     # error: Signature of "any" incompatible with supertype "NDFrame"
diff --git a/pandas/tests/arrays/boolean/test_reduction.py b/pandas/tests/arrays/boolean/test_reduction.py
index 696ae1df4c9fd..6770bf4e5b446 100644
--- a/pandas/tests/arrays/boolean/test_reduction.py
+++ b/pandas/tests/arrays/boolean/test_reduction.py
@@ -25,12 +25,15 @@ def data():
         ([False, False], False, False, False, False),
     ],
 )
-def test_any_all(values, exp_any, exp_all, exp_any_noskip, exp_all_noskip):
+def test_any_all(
+    values, exp_any, exp_all, exp_any_noskip, exp_all_noskip, using_python_scalars
+):
     # the methods return numpy scalars
-    exp_any = pd.NA if exp_any is pd.NA else np.bool_(exp_any)
-    exp_all = pd.NA if exp_all is pd.NA else np.bool_(exp_all)
-    exp_any_noskip = pd.NA if exp_any_noskip is pd.NA else np.bool_(exp_any_noskip)
-    exp_all_noskip = pd.NA if exp_all_noskip is pd.NA else np.bool_(exp_all_noskip)
+    if not using_python_scalars:
+        exp_any = pd.NA if exp_any is pd.NA else np.bool_(exp_any)
+        exp_all = pd.NA if exp_all is pd.NA else np.bool_(exp_all)
+        exp_any_noskip = pd.NA if exp_any_noskip is pd.NA else np.bool_(exp_any_noskip)
+        exp_all_noskip = pd.NA if exp_all_noskip is pd.NA else np.bool_(exp_all_noskip)
 
     for con in [pd.array, pd.Series]:
         a = con(values, dtype="boolean")
@@ -39,23 +42,30 @@ def test_any_all(values, exp_any, exp_all, exp_any_noskip, exp_all_noskip):
         assert a.any(skipna=False) is exp_any_noskip
         assert a.all(skipna=False) is exp_all_noskip
 
-        assert np.any(a.any()) is exp_any
-        assert np.all(a.all()) is exp_all
 
-
-def test_reductions_return_types(dropna, data, all_numeric_reductions):
+def test_reductions_return_types(
+    dropna, data, all_numeric_reductions, using_python_scalars
+):
     op = all_numeric_reductions
     s = pd.Series(data)
     if dropna:
         s = s.dropna()
 
-    if op in ("sum", "prod"):
-        assert isinstance(getattr(s, op)(), np.int_)
-    elif op == "count":
-        # Oddly on the 32 bit build (but not Windows), this is intc (!= intp)
-        assert isinstance(getattr(s, op)(), np.integer)
-    elif op in ("min", "max"):
-        assert isinstance(getattr(s, op)(), np.bool_)
+    if using_python_scalars:
+        expected = {
+            "sum": int,
+            "prod": int,
+            "count": int,
+            "min": bool,
+            "max": bool,
+        }.get(op, float)
     else:
-        # "mean", "std", "var", "median", "kurt", "skew"
-        assert isinstance(getattr(s, op)(), np.float64)
+        expected = {
+            "sum": np.int_,
+            "prod": np.int_,
+            "count": np.integer,
+            "min": np.bool_,
+            "max": np.bool_,
+        }.get(op, np.float64)
+    result = getattr(s, op)()
+    assert isinstance(result, expected), f"{type(result)} vs {expected}"
diff --git a/pandas/tests/arrays/floating/test_function.py b/pandas/tests/arrays/floating/test_function.py
index e03e8f30197b9..9d4dc81847188 100644
--- a/pandas/tests/arrays/floating/test_function.py
+++ b/pandas/tests/arrays/floating/test_function.py
@@ -171,7 +171,7 @@ def test_floating_array_numpy_sum(values, expected):
 
 
 @pytest.mark.parametrize("op", ["sum", "min", "max", "prod"])
-def test_preserve_dtypes(op):
+def test_preserve_dtypes(op, using_python_scalars):
     df = pd.DataFrame(
         {
             "A": ["a", "b", "b"],
@@ -182,7 +182,10 @@ def test_preserve_dtypes(op):
 
     # op
     result = getattr(df.C, op)()
-    assert isinstance(result, np.float64)
+    if using_python_scalars:
+        assert isinstance(result, float)
+    else:
+        assert isinstance(result, np.float64)
 
     # groupby
     result = getattr(df.groupby("A"), op)()
diff --git a/pandas/tests/arrays/integer/test_dtypes.py b/pandas/tests/arrays/integer/test_dtypes.py
index 1b4f070d47e4e..8b832b8dd151e 100644
--- a/pandas/tests/arrays/integer/test_dtypes.py
+++ b/pandas/tests/arrays/integer/test_dtypes.py
@@ -22,7 +22,7 @@ def test_dtypes(dtype):
 
 
 @pytest.mark.parametrize("op", ["sum", "min", "max", "prod"])
-def test_preserve_dtypes(op):
+def test_preserve_dtypes(op, using_python_scalars):
     # for ops that enable (mean would actually work here
     # but generally it is a float return value)
     df = pd.DataFrame(
@@ -35,7 +35,7 @@ def test_preserve_dtypes(op):
 
     # op
     result = getattr(df.C, op)()
-    if op in {"sum", "prod", "min", "max"}:
+    if op in {"sum", "prod", "min", "max"} and not using_python_scalars:
         assert isinstance(result, np.int64)
     else:
         assert isinstance(result, int)
diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py
index 7c4ce4c67f13d..7ee84a3768043 100644
--- a/pandas/tests/frame/test_reductions.py
+++ b/pandas/tests/frame/test_reductions.py
@@ -1298,7 +1298,7 @@ def wrapper(x):
             assert r0.all()
             assert r1.all()
 
-    def test_any_all_extra(self):
+    def test_any_all_extra(self, using_python_scalars):
         df = DataFrame(
             {
                 "A": [True, False, False],
@@ -1322,13 +1322,19 @@ def test_any_all_extra(self):
         tm.assert_series_equal(result, expected)
 
         # Axis is None
-        result = df.all(axis=None).item()
+        result = df.all(axis=None)
+        if not using_python_scalars:
+            result = result.item()
         assert result is False
 
-        result = df.any(axis=None).item()
+        result = df.any(axis=None)
+        if not using_python_scalars:
+            result = result.item()
         assert result is True
 
-        result = df[["C"]].all(axis=None).item()
+        result = df[["C"]].all(axis=None)
+        if not using_python_scalars:
+            result = result.item()
         assert result is True
 
     @pytest.mark.parametrize("axis", [0, 1])
@@ -1436,7 +1442,7 @@ def test_any_all_bool_only(self):
             ),
         ],
     )
-    def test_any_all_np_func(self, func, data, expected):
+    def test_any_all_np_func(self, func, data, expected, using_python_scalars):
         # GH 19976
         data = DataFrame(data)
 
@@ -1463,20 +1469,30 @@ def test_any_all_np_func(self, func, data, expected):
 
         elif data.dtypes.apply(lambda x: x != "category").any():
             result = func(data)
-            assert isinstance(result, np.bool_)
-            assert result.item() is expected
+            if using_python_scalars:
+                assert result is expected
+            else:
+                assert isinstance(result, np.bool_)
+                assert result.item() is expected
 
             # method version
             result = getattr(DataFrame(data), func.__name__)(axis=None)
-            assert isinstance(result, np.bool_)
-            assert result.item() is expected
+            if using_python_scalars:
+                assert result is expected
+            else:
+                assert isinstance(result, np.bool_)
+                assert result.item() is expected
 
-    def test_any_all_object(self):
+    def test_any_all_object(self, using_python_scalars):
         # GH 19976
-        result = np.all(DataFrame(columns=["a", "b"])).item()
+        result = np.all(DataFrame(columns=["a", "b"]))
+        if not using_python_scalars:
+            result = result.item()
         assert result is True
 
-        result = np.any(DataFrame(columns=["a", "b"])).item()
+        result = np.any(DataFrame(columns=["a", "b"]))
+        if not using_python_scalars:
+            result = result.item()
         assert result is False
 
     def test_any_all_object_bool_only(self):
diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py
index 529f9564ff83e..0abf5f90abcc2 100644
--- a/pandas/tests/groupby/test_categorical.py
+++ b/pandas/tests/groupby/test_categorical.py
@@ -1511,7 +1511,7 @@ def test_dataframe_groupby_on_2_categoricals_when_observed_is_true(reduction_fun
 
 @pytest.mark.parametrize("observed", [False, None])
 def test_dataframe_groupby_on_2_categoricals_when_observed_is_false(
-    reduction_func, observed
+    reduction_func, observed, using_python_scalars
 ):
     # GH 23865
     # GH 27075
@@ -1553,7 +1553,9 @@ def test_dataframe_groupby_on_2_categoricals_when_observed_is_false(
 
     expected = _results_for_groupbys_with_missing_categories[reduction_func]
 
-    if expected is np.nan:
+    if using_python_scalars and reduction_func == "size":
+        assert (res.loc[unobserved_cats] == expected).all() is True
+    elif expected is np.nan:
         assert res.loc[unobserved_cats].isnull().all().all()
     else:
         assert (res.loc[unobserved_cats] == expected).all().all()
diff --git a/pandas/tests/indexes/test_numpy_compat.py b/pandas/tests/indexes/test_numpy_compat.py
index 86d0ca1280596..8004e97698b67 100644
--- a/pandas/tests/indexes/test_numpy_compat.py
+++ b/pandas/tests/indexes/test_numpy_compat.py
@@ -151,7 +151,7 @@ def test_numpy_ufuncs_other(index, func):
 
 
 @pytest.mark.parametrize("func", [np.maximum, np.minimum])
-def test_numpy_ufuncs_reductions(index, func, request):
+def test_numpy_ufuncs_reductions(index, func):
     # TODO: overlap with tests.series.test_ufunc.test_reductions
     if len(index) == 0:
         pytest.skip("Test doesn't make sense for empty index.")
diff --git a/pandas/tests/reductions/test_reductions.py b/pandas/tests/reductions/test_reductions.py
index db27572b9da26..6cfeade5a255b 100644
--- a/pandas/tests/reductions/test_reductions.py
+++ b/pandas/tests/reductions/test_reductions.py
@@ -1268,11 +1268,15 @@ def test_sum_uint64(self):
         expected = np.uint64(10000000000000000000)
         tm.assert_almost_equal(result, expected)
 
-    def test_signedness_preserved_after_sum(self):
+    def test_signedness_preserved_after_sum(self, using_python_scalars):
         # GH 37491
         ser = Series([1, 2, 3, 4])
 
-        assert ser.astype("uint8").sum().dtype == "uint64"
+        result = ser.astype("uint8").sum()
+        if using_python_scalars:
+            assert isinstance(result, int)
+        else:
+            assert result.dtype == "uint64"
 
 
 class TestDatetime64SeriesReductions:
diff --git a/pandas/tests/reductions/test_stat_reductions.py b/pandas/tests/reductions/test_stat_reductions.py
index 4af1ca1d4800a..6aeaae51b107b 100644
--- a/pandas/tests/reductions/test_stat_reductions.py
+++ b/pandas/tests/reductions/test_stat_reductions.py
@@ -228,7 +228,7 @@ def test_sem(self):
         result = s.sem(ddof=1)
         assert pd.isna(result)
 
-    def test_skew(self):
+    def test_skew(self, using_python_scalars):
         sp_stats = pytest.importorskip("scipy.stats")
 
         string_series = Series(range(20), dtype=np.float64, name="series")
@@ -247,7 +247,10 @@ def test_skew(self):
                 assert np.isnan(df.skew()).all()
             else:
                 assert 0 == s.skew()
-                assert isinstance(s.skew(), np.float64)  # GH53482
+                if using_python_scalars:
+                    assert isinstance(s.skew(), float)
+                else:
+                    assert isinstance(s.skew(), np.float64)  # GH53482
                 assert (df.skew() == 0).all()
 
     def test_kurt(self):
@@ -258,7 +261,7 @@ def test_kurt(self):
         alt = lambda x: sp_stats.kurtosis(x, bias=False)
         self._check_stat_op("kurt", alt, string_series)
 
-    def test_kurt_corner(self):
+    def test_kurt_corner(self, using_python_scalars):
         # test corner cases, kurt() returns NaN unless there's at least 4
         # values
         min_N = 4
@@ -270,5 +273,8 @@ def test_kurt_corner(self):
                 assert np.isnan(df.kurt()).all()
             else:
                 assert 0 == s.kurt()
-                assert isinstance(s.kurt(), np.float64)  # GH53482
+                if using_python_scalars:
+                    assert isinstance(s.kurt(), float)
+                else:
+                    assert isinstance(s.kurt(), np.float64)  # GH53482
                 assert (df.kurt() == 0).all()
diff --git a/pandas/tests/series/test_ufunc.py b/pandas/tests/series/test_ufunc.py
index 5faacbb5559a9..7615510268a7a 100644
--- a/pandas/tests/series/test_ufunc.py
+++ b/pandas/tests/series/test_ufunc.py
@@ -330,7 +330,7 @@ def test_add(self, values_for_np_reduce, box_with_array):
             with pytest.raises(TypeError, match=msg):
                 np.add.reduce(obj)
 
-    def test_max(self, values_for_np_reduce, box_with_array):
+    def test_max(self, values_for_np_reduce, box_with_array, using_python_scalars):
         box = box_with_array
         values = values_for_np_reduce
 
@@ -349,12 +349,14 @@ def test_max(self, values_for_np_reduce, box_with_array):
             tm.assert_series_equal(result, expected)
         else:
             expected = values[1]
+            if using_python_scalars and values.dtype.kind in ["i", "f"]:
+                expected = expected.item()
             assert result == expected
             if same_type:
                 # check we have e.g. Timestamp instead of dt64
                 assert type(result) == type(expected)
 
-    def test_min(self, values_for_np_reduce, box_with_array):
+    def test_min(self, values_for_np_reduce, box_with_array, using_python_scalars):
         box = box_with_array
         values = values_for_np_reduce
 
@@ -372,6 +374,8 @@ def test_min(self, values_for_np_reduce, box_with_array):
             tm.assert_series_equal(result, expected)
         else:
             expected = values[0]
+            if using_python_scalars and values.dtype.kind in ["i", "f"]:
+                expected = expected.item()
             assert result == expected
             if same_type:
                 # check we have e.g. Timestamp instead of dt64
diff --git a/pandas/tests/test_nanops.py b/pandas/tests/test_nanops.py
index 6788f2056bb9a..1b84a3e578ead 100644
--- a/pandas/tests/test_nanops.py
+++ b/pandas/tests/test_nanops.py
@@ -1274,17 +1274,20 @@ def test_check_bottleneck_disallow(any_real_numpy_dtype, func):
 
 
 @pytest.mark.parametrize("val", [2**55, -(2**55), 20150515061816532])
-def test_nanmean_overflow(disable_bottleneck, val):
+def test_nanmean_overflow(disable_bottleneck, val, using_python_scalars):
     # GH 10155
     # In the previous implementation mean can overflow for int dtypes, it
     # is now consistent with numpy
 
     ser = Series(val, index=range(500), dtype=np.int64)
     result = ser.mean()
-    np_result = ser.values.mean()
     assert result == val
-    assert result == np_result
-    assert result.dtype == np.float64
+    if using_python_scalars:
+        assert isinstance(result, float)
+    else:
+        np_result = ser.values.mean()
+        assert result == np_result
+        assert result.dtype == np.float64
 
 
 @pytest.mark.parametrize(
@@ -1299,13 +1302,18 @@ def test_nanmean_overflow(disable_bottleneck, val):
     ],
 )
 @pytest.mark.parametrize("method", ["mean", "std", "var", "skew", "kurt", "min", "max"])
-def test_returned_dtype(disable_bottleneck, dtype, method):
+def test_returned_dtype(disable_bottleneck, dtype, method, using_python_scalars):
     if dtype is None:
         pytest.skip("np.float128 not available")
 
     ser = Series(range(10), dtype=dtype)
     result = getattr(ser, method)()
-    if is_integer_dtype(dtype) and method not in ["min", "max"]:
+    if using_python_scalars:
+        if is_integer_dtype(dtype) and method in ["min", "max"]:
+            assert isinstance(result, int)
+        else:
+            assert isinstance(result, float)
+    elif is_integer_dtype(dtype) and method not in ["min", "max"]:
         assert result.dtype == np.float64
     else:
         assert result.dtype == dtype
diff --git a/pandas/tests/window/moments/test_moments_consistency_ewm.py b/pandas/tests/window/moments/test_moments_consistency_ewm.py
index 49dee50954f4f..b24e1f822e998 100644
--- a/pandas/tests/window/moments/test_moments_consistency_ewm.py
+++ b/pandas/tests/window/moments/test_moments_consistency_ewm.py
@@ -133,7 +133,10 @@ def test_moments_consistency_var(all_data, adjust, ignore_na, min_periods, bias)
     var_x = all_data.ewm(
         com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na
     ).var(bias=bias)
-    assert not (var_x < 0).any().any()
+    if isinstance(all_data, Series):
+        assert not (var_x < 0).any()
+    else:
+        assert not (var_x < 0).any().any()
 
     if bias:
         # check that biased var(x) == mean(x^2) - mean(x)^2
@@ -156,7 +159,10 @@ def test_moments_consistency_var_constant(
     ).var(bias=bias)
 
     # check that variance of constant series is identically 0
-    assert not (var_x > 0).any().any()
+    if isinstance(consistent_data, Series):
+        assert not (var_x > 0).any()
+    else:
+        assert not (var_x > 0).any().any()
     expected = consistent_data * np.nan
     expected[count_x >= max(min_periods, 1)] = 0.0
     if not bias:
@@ -170,12 +176,18 @@ def test_ewm_consistency_std(all_data, adjust, ignore_na, min_periods, bias):
     var_x = all_data.ewm(
         com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na
     ).var(bias=bias)
-    assert not (var_x < 0).any().any()
+    if isinstance(all_data, Series):
+        assert not (var_x < 0).any()
+    else:
+        assert not (var_x < 0).any().any()
 
     std_x = all_data.ewm(
         com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na
     ).std(bias=bias)
-    assert not (std_x < 0).any().any()
+    if isinstance(all_data, Series):
+        assert not (std_x < 0).any()
+    else:
+        assert not (std_x < 0).any().any()
 
     # check that var(x) == std(x)^2
     tm.assert_equal(var_x, std_x * std_x)
@@ -183,7 +195,10 @@ def test_ewm_consistency_std(all_data, adjust, ignore_na, min_periods, bias):
     cov_x_x = all_data.ewm(
         com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na
     ).cov(all_data, bias=bias)
-    assert not (cov_x_x < 0).any().any()
+    if isinstance(all_data, Series):
+        assert not (cov_x_x < 0).any()
+    else:
+        assert not (cov_x_x < 0).any().any()
 
     # check that var(x) == cov(x, x)
     tm.assert_equal(var_x, cov_x_x)
diff --git a/pandas/tests/window/moments/test_moments_consistency_expanding.py b/pandas/tests/window/moments/test_moments_consistency_expanding.py
index 7d2fa1ad5d211..894220d831300 100644
--- a/pandas/tests/window/moments/test_moments_consistency_expanding.py
+++ b/pandas/tests/window/moments/test_moments_consistency_expanding.py
@@ -6,11 +6,17 @@
 
 
 def no_nans(x):
-    return x.notna().all().all()
+    if isinstance(x, Series):
+        return x.notna().all()
+    else:
+        return x.notna().all().all()
 
 
 def all_na(x):
-    return x.isnull().all().all()
+    if isinstance(x, Series):
+        return x.isnull().all()
+    else:
+        return x.isnull().all().all()
 
 
 @pytest.mark.parametrize("f", [lambda v: Series(v).sum(), np.nansum, np.sum])
@@ -32,7 +38,10 @@ def test_expanding_apply_consistency_sum_nans(request, all_data, min_periods, f)
 @pytest.mark.parametrize("ddof", [0, 1])
 def test_moments_consistency_var(all_data, min_periods, ddof):
     var_x = all_data.expanding(min_periods=min_periods).var(ddof=ddof)
-    assert not (var_x < 0).any().any()
+    if isinstance(all_data, Series):
+        assert not (var_x < 0).any()
+    else:
+        assert not (var_x < 0).any().any()
 
     if ddof == 0:
         # check that biased var(x) == mean(x^2) - mean(x)^2
@@ -47,7 +56,10 @@ def test_moments_consistency_var_constant(consistent_data, min_periods, ddof):
     var_x = consistent_data.expanding(min_periods=min_periods).var(ddof=ddof)
 
     # check that variance of constant series is identically 0
-    assert not (var_x > 0).any().any()
+    if isinstance(consistent_data, Series):
+        assert not (var_x > 0).any()
+    else:
+        assert not (var_x > 0).any().any()
     expected = consistent_data * np.nan
     expected[count_x >= max(min_periods, 1)] = 0.0
     if ddof == 1:
@@ -58,16 +70,25 @@ def test_moments_consistency_var_constant(consistent_data, min_periods, ddof):
 @pytest.mark.parametrize("ddof", [0, 1])
 def test_expanding_consistency_var_std_cov(all_data, min_periods, ddof):
     var_x = all_data.expanding(min_periods=min_periods).var(ddof=ddof)
-    assert not (var_x < 0).any().any()
+    if isinstance(all_data, Series):
+        assert not (var_x < 0).any()
+    else:
+        assert not (var_x < 0).any().any()
 
     std_x = all_data.expanding(min_periods=min_periods).std(ddof=ddof)
-    assert not (std_x < 0).any().any()
+    if isinstance(all_data, Series):
+        assert not (std_x < 0).any()
+    else:
+        assert not (std_x < 0).any().any()
 
     # check that var(x) == std(x)^2
     tm.assert_equal(var_x, std_x * std_x)
 
     cov_x_x = all_data.expanding(min_periods=min_periods).cov(all_data, ddof=ddof)
-    assert not (cov_x_x < 0).any().any()
+    if isinstance(all_data, Series):
+        assert not (cov_x_x < 0).any()
+    else:
+        assert not (cov_x_x < 0).any().any()
 
     # check that var(x) == cov(x, x)
     tm.assert_equal(var_x, cov_x_x)
diff --git a/pandas/tests/window/moments/test_moments_consistency_rolling.py b/pandas/tests/window/moments/test_moments_consistency_rolling.py
index be22338c00cb2..80d02eb51199d 100644
--- a/pandas/tests/window/moments/test_moments_consistency_rolling.py
+++ b/pandas/tests/window/moments/test_moments_consistency_rolling.py
@@ -1,16 +1,24 @@
 import numpy as np
 import pytest
 
+from pandas._config import using_python_scalars
+
 from pandas import Series
 import pandas._testing as tm
 
 
 def no_nans(x):
-    return x.notna().all().all()
+    if using_python_scalars() and isinstance(x, Series):
+        return x.notna().all()
+    else:
+        return x.notna().all().all()
 
 
 def all_na(x):
-    return x.isnull().all().all()
+    if using_python_scalars() and isinstance(x, Series):
+        return x.isnull().all()
+    else:
+        return x.isnull().all().all()
 
 
 @pytest.fixture(params=[(1, 0), (5, 1)])
@@ -48,7 +56,10 @@ def test_moments_consistency_var(all_data, rolling_consistency_cases, center, dd
     var_x = all_data.rolling(window=window, min_periods=min_periods, center=center).var(
         ddof=ddof
     )
-    assert not (var_x < 0).any().any()
+    if isinstance(all_data, Series):
+        assert not (var_x < 0).any()
+    else:
+        assert not (var_x < 0).any().any()
 
     if ddof == 0:
         # check that biased var(x) == mean(x^2) - mean(x)^2
@@ -77,7 +88,10 @@ def test_moments_consistency_var_constant(
     ).var(ddof=ddof)
 
     # check that variance of constant series is identically 0
-    assert not (var_x > 0).any().any()
+    if isinstance(consistent_data, Series):
+        assert not (var_x > 0).any()
+    else:
+        assert not (var_x > 0).any().any()
     expected = consistent_data * np.nan
     expected[count_x >= max(min_periods, 1)] = 0.0
     if ddof == 1:
@@ -94,12 +108,18 @@ def test_rolling_consistency_var_std_cov(
     var_x = all_data.rolling(window=window, min_periods=min_periods, center=center).var(
         ddof=ddof
     )
-    assert not (var_x < 0).any().any()
+    if isinstance(all_data, Series):
+        assert not (var_x < 0).any()
+    else:
+        assert not (var_x < 0).any().any()
 
     std_x = all_data.rolling(window=window, min_periods=min_periods, center=center).std(
         ddof=ddof
     )
-    assert not (std_x < 0).any().any()
+    if isinstance(all_data, Series):
+        assert not (std_x < 0).any()
+    else:
+        assert not (std_x < 0).any().any()
 
     # check that var(x) == std(x)^2
     tm.assert_equal(var_x, std_x * std_x)
@@ -107,7 +127,10 @@ def test_rolling_consistency_var_std_cov(
     cov_x_x = all_data.rolling(
         window=window, min_periods=min_periods, center=center
     ).cov(all_data, ddof=ddof)
-    assert not (cov_x_x < 0).any().any()
+    if isinstance(all_data, Series):
+        assert not (cov_x_x < 0).any()
+    else:
+        assert not (cov_x_x < 0).any().any()
 
     # check that var(x) == cov(x, x)
     tm.assert_equal(var_x, cov_x_x)