Skip to content

Commit

Permalink
Backport PR #43150 on branch 1.3.x (BUG: GroupBy.quantile fails with …
Browse files Browse the repository at this point in the history
…pd.NA) (#43417)
  • Loading branch information
debnathshoham committed Sep 9, 2021
1 parent ac09649 commit 5d6e352
Show file tree
Hide file tree
Showing 3 changed files with 46 additions and 0 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.3.3.rst
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ Fixed regressions
- Fixed regression in :class:`DataFrame` constructor failing to broadcast for defined :class:`Index` and len one list of :class:`Timestamp` (:issue:`42810`)
- Performance regression in :meth:`core.window.ewm.ExponentialMovingWindow.mean` (:issue:`42333`)
- Fixed regression in :meth:`.GroupBy.agg` incorrectly raising in some cases (:issue:`42390`)
- Fixed regression in :meth:`.GroupBy.quantile` which was failing with ``pandas.NA`` (:issue:`42849`)
- Fixed regression in :meth:`.GroupBy.apply` where ``nan`` values were dropped even with ``dropna=False`` (:issue:`43205`)
- Fixed regression in :meth:`merge` where ``on`` columns with ``ExtensionDtype`` or ``bool`` data types were cast to ``object`` in ``right`` and ``outer`` merge (:issue:`40073`)
- Fixed regression in :meth:`RangeIndex.where` and :meth:`RangeIndex.putmask` raising ``AssertionError`` when result did not represent a :class:`RangeIndex` (:issue:`43240`)
Expand Down
4 changes: 4 additions & 0 deletions pandas/core/groupby/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ class providing the base-class of operations.
from pandas.core.dtypes.common import (
is_bool_dtype,
is_datetime64_dtype,
is_float_dtype,
is_integer_dtype,
is_numeric_dtype,
is_object_dtype,
Expand Down Expand Up @@ -2450,6 +2451,9 @@ def pre_processor(vals: ArrayLike) -> tuple[np.ndarray, np.dtype | None]:
elif is_timedelta64_dtype(vals.dtype):
inference = np.dtype("timedelta64[ns]")
out = np.asarray(vals).astype(float)
elif isinstance(vals, ExtensionArray) and is_float_dtype(vals):
inference = np.dtype(np.float64)
out = vals.to_numpy(dtype=float, na_value=np.nan)
else:
out = np.asarray(vals)

Expand Down
41 changes: 41 additions & 0 deletions pandas/tests/groupby/test_quantile.py
Original file line number Diff line number Diff line change
Expand Up @@ -248,6 +248,47 @@ def test_groupby_quantile_skips_invalid_dtype(q):
tm.assert_frame_equal(result, expected)


def test_groupby_quantile_NA_float(any_float_allowed_nullable_dtype):
# GH#42849
df = DataFrame(
{"x": [1, 1], "y": [0.2, np.nan]}, dtype=any_float_allowed_nullable_dtype
)
result = df.groupby("x")["y"].quantile(0.5)
expected = pd.Series([0.2], dtype=float, index=Index(df["x"][:1]), name="y")
tm.assert_series_equal(expected, result)

result = df.groupby("x")["y"].quantile([0.5, 0.75])
expected = pd.Series(
[0.2] * 2,
index=pd.MultiIndex.from_arrays(
[Index(df["x"]), [0.5, 0.75]], names=["x", None]
),
name="y",
)
tm.assert_series_equal(result, expected)


def test_groupby_quantile_NA_int(any_nullable_int_dtype):
# GH#42849
df = DataFrame({"x": [1, 1], "y": [2, 5]}, dtype=any_nullable_int_dtype)
result = df.groupby("x")["y"].quantile(0.5)
expected = pd.Series([3.5], dtype=float, index=Index(df["x"][:1]), name="y")
tm.assert_series_equal(expected, result)

result = df.groupby("x").quantile(0.5)
expected = DataFrame({"y": 3.5}, index=Index(df["x"][:1]))
tm.assert_frame_equal(result, expected)


@pytest.mark.parametrize("dtype", ["Float64", "Float32"])
def test_groupby_quantile_allNA_column(dtype):
# GH#42849
df = DataFrame({"x": [1, 1], "y": [pd.NA] * 2}, dtype=dtype)
result = df.groupby("x")["y"].quantile(0.5)
expected = pd.Series([np.nan], dtype=float, index=Index(df["x"][:1]), name="y")
tm.assert_series_equal(expected, result)


def test_groupby_timedelta_quantile():
# GH: 29485
df = DataFrame(
Expand Down

0 comments on commit 5d6e352

Please sign in to comment.