diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 75b4c5c0fe14d..457ab6eb965ca 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -1151,6 +1151,7 @@ Groupby/resample/rolling - Bug in :meth:`.DataFrameGroupBy.groups` and :meth:`.SeriesGroupby.groups` that would not respect groupby argument ``dropna`` (:issue:`55919`) - Bug in :meth:`.DataFrameGroupBy.median` where nat values gave an incorrect result. (:issue:`57926`) - Bug in :meth:`.DataFrameGroupBy.quantile` when ``interpolation="nearest"`` is inconsistent with :meth:`DataFrame.quantile` (:issue:`47942`) +- Bug in :meth:`.DataFrameGroupBy` reductions where non-Boolean values were allowed for the ``numeric_only`` argument; passing a non-Boolean value will now raise (:issue:`62778`) - Bug in :meth:`.Resampler.interpolate` on a :class:`DataFrame` with non-uniform sampling and/or indices not aligning with the resulting resampled index would result in wrong interpolation (:issue:`21351`) - Bug in :meth:`.Series.rolling` when used with a :class:`.BaseIndexer` subclass and computing min/max (:issue:`46726`) - Bug in :meth:`DataFrame.ewm` and :meth:`Series.ewm` when passed ``times`` and aggregation functions other than mean (:issue:`51695`) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index fe7bf5bbc4c2c..910268ebdfb8a 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -76,6 +76,7 @@ class providing the base-class of operations. ensure_dtype_can_hold_na, ) from pandas.core.dtypes.common import ( + is_bool, is_bool_dtype, is_float_dtype, is_hashable, @@ -109,9 +110,7 @@ class providing the base-class of operations. SparseArray, ) from pandas.core.arrays.string_ import StringDtype -from pandas.core.arrays.string_arrow import ( - ArrowStringArray, -) +from pandas.core.arrays.string_arrow import ArrowStringArray from pandas.core.base import ( PandasObject, SelectionMixin, @@ -1756,6 +1755,9 @@ def _cython_agg_general( # Note: we never get here with how="ohlc" for DataFrameGroupBy; # that goes through SeriesGroupBy + if not is_bool(numeric_only): + raise ValueError("numeric_only accepts only Boolean values") + data = self._get_data_to_aggregate(numeric_only=numeric_only, name=how) def array_func(values: ArrayLike) -> ArrayLike: diff --git a/pandas/tests/groupby/test_reductions.py b/pandas/tests/groupby/test_reductions.py index 5c8be57b10013..9c13ceec49e35 100644 --- a/pandas/tests/groupby/test_reductions.py +++ b/pandas/tests/groupby/test_reductions.py @@ -1520,3 +1520,19 @@ def test_groupby_std_datetimelike(): exp_ser = Series([td1 * 2, td1, td1, td1, td4], index=np.arange(5)) expected = DataFrame({"A": exp_ser, "B": exp_ser, "C": exp_ser}) tm.assert_frame_equal(result, expected) + + +def test_mean_numeric_only_validates_bool(): + # GH#62778 + + df = DataFrame({"A": range(5), "B": range(5)}) + + msg = "numeric_only accepts only Boolean values" + with pytest.raises(ValueError, match=msg): + df.groupby(["A"]).mean(["B"]) + + with pytest.raises(ValueError, match=msg): + df.groupby(["A"]).mean(numeric_only="True") + + with pytest.raises(ValueError, match=msg): + df.groupby(["A"]).mean(numeric_only=1)