Skip to content

Commit

Permalink
BUG: groupby.describe with as_index=False incorrect (#49643)
Browse files Browse the repository at this point in the history
* BUG: groupby.describe with as_index=False incorrect

* Add test for two groupings

* Simplify logic
  • Loading branch information
rhshadrach committed Nov 18, 2022
1 parent 04c33d3 commit 68e2c2a
Show file tree
Hide file tree
Showing 4 changed files with 70 additions and 31 deletions.
4 changes: 3 additions & 1 deletion doc/source/whatsnew/v2.0.0.rst
Expand Up @@ -718,7 +718,9 @@ Groupby/resample/rolling
- Bug in :class:`.DataFrameGroupBy` and :class:`.SeriesGroupBy` would not include unobserved categories in result when grouping by categorical indexes (:issue:`49354`)
- Bug in :class:`.DataFrameGroupBy` and :class:`.SeriesGroupBy` would change result order depending on the input index when grouping by categoricals (:issue:`49223`)
- Bug in :class:`.DataFrameGroupBy` and :class:`.SeriesGroupBy` when grouping on categorical data would sort result values even when used with ``sort=False`` (:issue:`42482`)
-
- Bug in :meth:`.DataFrameGroupBy.apply` and :class:`SeriesGroupBy.apply` with ``as_index=False`` would not attempt the computation without using the grouping keys when using them failed with a ``TypeError`` (:issue:`49256`)
- Bug in :meth:`.DataFrameGroupBy.describe` would describe the group keys (:issue:`49256`)
- Bug in :meth:`.SeriesGroupBy.describe` with ``as_index=False`` would have the incorrect shape (:issue:`49256`)

Reshaping
^^^^^^^^^
Expand Down
12 changes: 9 additions & 3 deletions pandas/core/groupby/groupby.py
Expand Up @@ -1061,8 +1061,7 @@ def _set_group_selection(self) -> None:
# This is a no-op for SeriesGroupBy
grp = self.grouper
if not (
self.as_index
and grp.groupings is not None
grp.groupings is not None
and self.obj.ndim > 1
and self._group_selection is None
):
Expand Down Expand Up @@ -2640,7 +2639,14 @@ def describe(self, **kwargs):
)
if self.axis == 1:
return result.T
return result.unstack()

# GH#49256 - properly handle the grouping column(s)
if self._selected_obj.ndim != 1 or self.as_index:
result = result.unstack()
if not self.as_index:
self._insert_inaxis_grouper_inplace(result)

return result

@final
def resample(self, rule, *args, **kwargs):
Expand Down
12 changes: 9 additions & 3 deletions pandas/tests/groupby/test_apply.py
Expand Up @@ -974,15 +974,21 @@ def test_apply_function_index_return(function):


def test_apply_function_with_indexing_return_column():
# GH#7002, GH#41480
# GH#7002, GH#41480, GH#49256
df = DataFrame(
{
"foo1": ["one", "two", "two", "three", "one", "two"],
"foo2": [1, 2, 4, 4, 5, 6],
}
)
with pytest.raises(TypeError, match="Could not convert"):
df.groupby("foo1", as_index=False).apply(lambda x: x.mean())
result = df.groupby("foo1", as_index=False).apply(lambda x: x.mean())
expected = DataFrame(
{
"foo1": ["one", "three", "two"],
"foo2": [3.0, 4.0, 4.0],
}
)
tm.assert_frame_equal(result, expected)


@pytest.mark.parametrize(
Expand Down
73 changes: 49 additions & 24 deletions pandas/tests/groupby/test_function.py
Expand Up @@ -336,13 +336,7 @@ def test_describe(self, df, gb, gni):
result = gb.describe()
tm.assert_frame_equal(result, expected)

expected = pd.concat(
[
df[df.A == 1].describe().unstack().to_frame().T,
df[df.A == 3].describe().unstack().to_frame().T,
]
)
expected.index = Index([0, 1])
expected = expected.reset_index()
result = gni.describe()
tm.assert_frame_equal(result, expected)

Expand Down Expand Up @@ -1093,6 +1087,38 @@ def test_series_describe_single():
tm.assert_series_equal(result, expected)


@pytest.mark.parametrize("keys", ["key1", ["key1", "key2"]])
def test_series_describe_as_index(as_index, keys):
# GH#49256
df = DataFrame(
{
"key1": ["one", "two", "two", "three", "two"],
"key2": ["one", "two", "two", "three", "two"],
"foo2": [1, 2, 4, 4, 6],
}
)
gb = df.groupby(keys, as_index=as_index)["foo2"]
result = gb.describe()
expected = DataFrame(
{
"key1": ["one", "three", "two"],
"count": [1.0, 1.0, 3.0],
"mean": [1.0, 4.0, 4.0],
"std": [np.nan, np.nan, 2.0],
"min": [1.0, 4.0, 2.0],
"25%": [1.0, 4.0, 3.0],
"50%": [1.0, 4.0, 4.0],
"75%": [1.0, 4.0, 5.0],
"max": [1.0, 4.0, 6.0],
}
)
if len(keys) == 2:
expected.insert(1, "key2", expected["key1"])
if as_index:
expected = expected.set_index(keys)
tm.assert_frame_equal(result, expected)


def test_series_index_name(df):
grouped = df.loc[:, ["C"]].groupby(df["A"])
result = grouped.agg(lambda x: x.mean())
Expand Down Expand Up @@ -1177,29 +1203,25 @@ def test_frame_describe_unstacked_format():
"pandas.errors.PerformanceWarning"
)
@pytest.mark.parametrize("as_index", [True, False])
def test_describe_with_duplicate_output_column_names(as_index):
@pytest.mark.parametrize("keys", [["a1"], ["a1", "a2"]])
def test_describe_with_duplicate_output_column_names(as_index, keys):
# GH 35314
df = DataFrame(
{
"a": [99, 99, 99, 88, 88, 88],
"a1": [99, 99, 99, 88, 88, 88],
"a2": [99, 99, 99, 88, 88, 88],
"b": [1, 2, 3, 4, 5, 6],
"c": [10, 20, 30, 40, 50, 60],
},
columns=["a", "b", "b"],
columns=["a1", "a2", "b", "b"],
copy=False,
)
if keys == ["a1"]:
df = df.drop(columns="a2")

expected = (
DataFrame.from_records(
[
("a", "count", 3.0, 3.0),
("a", "mean", 88.0, 99.0),
("a", "std", 0.0, 0.0),
("a", "min", 88.0, 99.0),
("a", "25%", 88.0, 99.0),
("a", "50%", 88.0, 99.0),
("a", "75%", 88.0, 99.0),
("a", "max", 88.0, 99.0),
("b", "count", 3.0, 3.0),
("b", "mean", 5.0, 2.0),
("b", "std", 1.0, 1.0),
Expand All @@ -1222,14 +1244,17 @@ def test_describe_with_duplicate_output_column_names(as_index):
.T
)
expected.columns.names = [None, None]
expected.index = Index([88, 99], name="a")

if as_index:
expected = expected.drop(columns=["a"], level=0)
if len(keys) == 2:
expected.index = MultiIndex(
levels=[[88, 99], [88, 99]], codes=[[0, 1], [0, 1]], names=["a1", "a2"]
)
else:
expected = expected.reset_index(drop=True)
expected.index = Index([88, 99], name="a1")

if not as_index:
expected = expected.reset_index()

result = df.groupby("a", as_index=as_index).describe()
result = df.groupby(keys, as_index=as_index).describe()

tm.assert_frame_equal(result, expected)

Expand Down

0 comments on commit 68e2c2a

Please sign in to comment.