diff --git a/doc/source/whatsnew/v1.3.3.rst b/doc/source/whatsnew/v1.3.3.rst index 00409cf963ab3..46080e1847317 100644 --- a/doc/source/whatsnew/v1.3.3.rst +++ b/doc/source/whatsnew/v1.3.3.rst @@ -17,6 +17,7 @@ Fixed regressions - Fixed regression in :class:`DataFrame` constructor failing to broadcast for defined :class:`Index` and len one list of :class:`Timestamp` (:issue:`42810`) - Performance regression in :meth:`core.window.ewm.ExponentialMovingWindow.mean` (:issue:`42333`) - Fixed regression in :meth:`.GroupBy.agg` incorrectly raising in some cases (:issue:`42390`) +- Fixed regression in :meth:`.GroupBy.quantile` which was failing with ``pandas.NA`` (:issue:`42849`) - Fixed regression in :meth:`merge` where ``on`` columns with ``ExtensionDtype`` or ``bool`` data types were cast to ``object`` in ``right`` and ``outer`` merge (:issue:`40073`) - Fixed regression in :meth:`RangeIndex.where` and :meth:`RangeIndex.putmask` raising ``AssertionError`` when result did not represent a :class:`RangeIndex` (:issue:`43240`) - Fixed regression in :meth:`read_parquet` where the ``fastparquet`` engine would not work properly with fastparquet 0.7.0 (:issue:`43075`) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 42de5137ba670..79ee71ddb1047 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -63,6 +63,7 @@ class providing the base-class of operations. from pandas.core.dtypes.common import ( is_bool_dtype, is_datetime64_dtype, + is_float_dtype, is_integer_dtype, is_numeric_dtype, is_object_dtype, @@ -2453,6 +2454,9 @@ def pre_processor(vals: ArrayLike) -> tuple[np.ndarray, np.dtype | None]: elif is_timedelta64_dtype(vals.dtype): inference = np.dtype("timedelta64[ns]") out = np.asarray(vals).astype(float) + elif isinstance(vals, ExtensionArray) and is_float_dtype(vals): + inference = np.dtype(np.float64) + out = vals.to_numpy(dtype=float, na_value=np.nan) else: out = np.asarray(vals) diff --git a/pandas/tests/groupby/test_quantile.py b/pandas/tests/groupby/test_quantile.py index 90437b9139594..83d6c20bcac24 100644 --- a/pandas/tests/groupby/test_quantile.py +++ b/pandas/tests/groupby/test_quantile.py @@ -248,6 +248,45 @@ def test_groupby_quantile_skips_invalid_dtype(q): tm.assert_frame_equal(result, expected) +def test_groupby_quantile_NA_float(any_float_dtype): + # GH#42849 + df = DataFrame({"x": [1, 1], "y": [0.2, np.nan]}, dtype=any_float_dtype) + result = df.groupby("x")["y"].quantile(0.5) + expected = pd.Series([0.2], dtype=float, index=[1.0], name="y") + expected.index.name = "x" + tm.assert_series_equal(expected, result) + + result = df.groupby("x")["y"].quantile([0.5, 0.75]) + expected = pd.Series( + [0.2] * 2, + index=pd.MultiIndex.from_product(([1.0], [0.5, 0.75]), names=["x", None]), + name="y", + ) + tm.assert_series_equal(result, expected) + + +def test_groupby_quantile_NA_int(any_int_ea_dtype): + # GH#42849 + df = DataFrame({"x": [1, 1], "y": [2, 5]}, dtype=any_int_ea_dtype) + result = df.groupby("x")["y"].quantile(0.5) + expected = pd.Series([3.5], dtype=float, index=Index([1], name="x"), name="y") + tm.assert_series_equal(expected, result) + + result = df.groupby("x").quantile(0.5) + expected = DataFrame({"y": 3.5}, index=Index([1], name="x")) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("dtype", ["Float64", "Float32"]) +def test_groupby_quantile_allNA_column(dtype): + # GH#42849 + df = DataFrame({"x": [1, 1], "y": [pd.NA] * 2}, dtype=dtype) + result = df.groupby("x")["y"].quantile(0.5) + expected = pd.Series([np.nan], dtype=float, index=[1.0], name="y") + expected.index.name = "x" + tm.assert_series_equal(expected, result) + + def test_groupby_timedelta_quantile(): # GH: 29485 df = DataFrame(