Skip to content

Commit

Permalink
BUG: Fix calling groupBy(...).apply(func) on an empty dataframe invok…
Browse files Browse the repository at this point in the history
…es func (pandas-dev#48579)
  • Loading branch information
ntachukwu committed Sep 27, 2022
1 parent 0e93caf commit 8b0ad71
Show file tree
Hide file tree
Showing 3 changed files with 32 additions and 7 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.5.1.rst
Expand Up @@ -77,6 +77,7 @@ Fixed regressions
- Fixed performance regression in :func:`factorize` when ``na_sentinel`` is not ``None`` and ``sort=False`` (:issue:`48620`)
- Fixed regression causing an ``AttributeError`` during warning emitted if the provided table name in :meth:`DataFrame.to_sql` and the table name actually used in the database do not match (:issue:`48733`)
- Fixed :meth:`.DataFrameGroupBy.size` not returning a Series when ``axis=1`` (:issue:`48738`)
- Fixed Regression in :meth:`DataFrameGroupBy.apply` when user defined function is called on an empty dataframe (:issue:`47985`)

.. ---------------------------------------------------------------------------
Expand Down
13 changes: 6 additions & 7 deletions pandas/core/groupby/ops.py
Expand Up @@ -787,15 +787,14 @@ def apply(
if not mutated and not _is_indexed_like(res, group_axes, axis):
mutated = True
result_values.append(res)

# getattr pattern for __name__ is needed for functools.partial objects
if len(group_keys) == 0 and getattr(f, "__name__", None) not in [
"idxmin",
"idxmax",
"nanargmin",
"nanargmax",
if len(group_keys) == 0 and getattr(f, "__name__", None) in [
"mad",
"skew",
"sum",
"prod",
]:
# If group_keys is empty, then no function calls have been made,
# If group_keys is empty, then no function calls have been made,
# so we will not have raised even if this is an invalid dtype.
# So do one dummy call here to raise appropriate TypeError.
f(data.iloc[:0])
Expand Down
25 changes: 25 additions & 0 deletions pandas/tests/groupby/test_apply.py
Expand Up @@ -1331,3 +1331,28 @@ def test_result_name_when_one_group(name):
expected = Series([1, 2], name=name)

tm.assert_series_equal(result, expected)


@pytest.mark.parametrize(
"method, op",
[
("apply", lambda gb: gb.values[-1]),
("apply", lambda gb: gb["b"].iloc[0]),
("agg", "mad"),
("agg", "skew"),
("agg", "prod"),
("agg", "sum"),
],
)
def test_empty_df(method, op):
# GH 47985
empty_df = DataFrame({"a": [], "b": []})
gb = empty_df.groupby("a", group_keys=True)
group = getattr(gb, "b")

result = getattr(group, method)(op)
expected = Series(
[], name="b", dtype="float64", index=Index([], dtype="float64", name="a")
)

tm.assert_series_equal(result, expected)

0 comments on commit 8b0ad71

Please sign in to comment.