Skip to content

Commit

Permalink
BUG: DataFrame.agg and apply with 'size' returns a scalar (#39935)
Browse files Browse the repository at this point in the history
  • Loading branch information
rhshadrach committed Feb 24, 2021
1 parent dec7d21 commit 212323f
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 10 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.3.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -323,6 +323,7 @@ Numeric
- Bug in :meth:`DataFrame.rank` with ``np.inf`` and mixture of ``np.nan`` and ``np.inf`` (:issue:`32593`)
- Bug in :meth:`DataFrame.rank` with ``axis=0`` and columns holding incomparable types raising ``IndexError`` (:issue:`38932`)
- Bug in :func:`select_dtypes` different behavior between Windows and Linux with ``include="int"`` (:issue:`36569`)
- Bug in :meth:`DataFrame.apply` and :meth:`DataFrame.agg` when passed argument ``func="size"`` would operate on the entire ``DataFrame`` instead of rows or columns (:issue:`39934`)
-

Conversion
Expand Down
21 changes: 15 additions & 6 deletions pandas/core/apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,10 @@ def f(x):
def index(self) -> Index:
return self.obj.index

@property
def agg_axis(self) -> Index:
return self.obj._get_agg_axis(self.axis)

@abc.abstractmethod
def apply(self) -> FrameOrSeriesUnion:
pass
Expand Down Expand Up @@ -541,17 +545,26 @@ def maybe_apply_str(self) -> Optional[FrameOrSeriesUnion]:
f = self.f
if not isinstance(f, str):
return None

obj = self.obj

# TODO: GH 39993 - Avoid special-casing by replacing with lambda
if f == "size" and isinstance(obj, ABCDataFrame):
# Special-cased because DataFrame.size returns a single scalar
value = obj.shape[self.axis]
return obj._constructor_sliced(value, index=self.agg_axis, name="size")

# Support for `frame.transform('method')`
# Some methods (shift, etc.) require the axis argument, others
# don't, so inspect and insert if necessary.
func = getattr(self.obj, f, None)
func = getattr(obj, f, None)
if callable(func):
sig = inspect.getfullargspec(func)
if "axis" in sig.args:
self.kwargs["axis"] = self.axis
elif self.axis != 0:
raise ValueError(f"Operation {f} does not support axis=1")
return self.obj._try_aggregate_string_function(f, *self.args, **self.kwargs)
return obj._try_aggregate_string_function(f, *self.args, **self.kwargs)

def maybe_apply_multiple(self) -> Optional[FrameOrSeriesUnion]:
"""
Expand Down Expand Up @@ -613,10 +626,6 @@ def values(self):
def dtypes(self) -> Series:
return self.obj.dtypes

@property
def agg_axis(self) -> Index:
return self.obj._get_agg_axis(self.axis)

def apply(self) -> FrameOrSeriesUnion:
""" compute the results """
# dispatch to agg
Expand Down
18 changes: 14 additions & 4 deletions pandas/tests/apply/test_frame_apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -1415,11 +1415,21 @@ def test_non_callable_aggregates(how):

tm.assert_series_equal(result, expected)

# Just a string attribute arg same as calling df.arg
result = getattr(df, how)("size")
expected = df.size

assert result == expected
@pytest.mark.parametrize("how", ["agg", "apply"])
def test_size_as_str(how, axis):
# GH 39934
df = DataFrame(
{"A": [None, 2, 3], "B": [1.0, np.nan, 3.0], "C": ["foo", None, "bar"]}
)
# Just a string attribute arg same as calling df.arg
# on the columns
result = getattr(df, how)("size", axis=axis)
if axis == 0 or axis == "index":
expected = Series(df.shape[0], index=df.columns, name="size")
else:
expected = Series(df.shape[1], index=df.index, name="size")
tm.assert_series_equal(result, expected)


def test_agg_listlike_result():
Expand Down

0 comments on commit 212323f

Please sign in to comment.