Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DEPR: DataFrame GroupBy indexing with single items DeprecationWarning #30546

Merged
merged 11 commits into from
Jan 3, 2020
Merged
Original file line number Diff line number Diff line change
Expand Up @@ -629,7 +629,7 @@ for more details and examples.

.. ipython:: python

tips_summed = tips.groupby(['sex', 'smoker'])['total_bill', 'tip'].sum()
tips_summed = tips.groupby(['sex', 'smoker'])[['total_bill', 'tip']].sum()
tips_summed.head()


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -617,7 +617,7 @@ for more details and examples.

.. ipython:: python

tips_summed = tips.groupby(['sex', 'smoker'])['total_bill', 'tip'].sum()
tips_summed = tips.groupby(['sex', 'smoker'])[['total_bill', 'tip']].sum()
tips_summed.head()


Expand Down
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -523,6 +523,7 @@ Deprecations
it is recommended to use ``json_normalize`` as :func:`pandas.json_normalize` instead (:issue:`27586`).
- :meth:`DataFrame.to_stata`, :meth:`DataFrame.to_feather`, and :meth:`DataFrame.to_parquet` argument "fname" is deprecated, use "path" instead (:issue:`23574`)
- The deprecated internal attributes ``_start``, ``_stop`` and ``_step`` of :class:`RangeIndex` now raise a ``FutureWarning`` instead of a ``DeprecationWarning`` (:issue:`26581`)
- When selecting columns from a :class:`DataFrameGroupBy` object, passing individual items in brackets is deprecated, should pass in a list of items instead. (:issue:`23566`)
yehoshuadimarsky marked this conversation as resolved.
Show resolved Hide resolved

.. _whatsnew_1000.prior_deprecations:

Expand Down
14 changes: 13 additions & 1 deletion pandas/core/groupby/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
Union,
cast,
)
import warnings

import numpy as np

Expand Down Expand Up @@ -325,7 +326,7 @@ def _aggregate_multiple_funcs(self, arg):
return DataFrame(results, columns=columns)

def _wrap_series_output(
self, output: Mapping[base.OutputKey, Union[Series, np.ndarray]], index: Index,
self, output: Mapping[base.OutputKey, Union[Series, np.ndarray]], index: Index
) -> Union[Series, DataFrame]:
"""
Wraps the output of a SeriesGroupBy operation into the expected result.
Expand Down Expand Up @@ -1574,6 +1575,17 @@ def filter(self, func, dropna=True, *args, **kwargs):

return self._apply_filter(indices, dropna)

def __getitem__(self, key):
# per GH 23566
if isinstance(key, tuple) and len(key) > 1:
warnings.warn(
"Indexing with multiple keys (implicitly converted to a tuple "
"of keys) will be deprecated, use a list instead.",
DeprecationWarning,
yehoshuadimarsky marked this conversation as resolved.
Show resolved Hide resolved
stacklevel=2,
)
return super().__getitem__(key)

def _gotitem(self, key, ndim: int, subset=None):
"""
sub-classes to define
Expand Down
37 changes: 31 additions & 6 deletions pandas/tests/groupby/test_grouping.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,14 +71,16 @@ def test_getitem_list_of_columns(self):
)

result = df.groupby("A")[["C", "D"]].mean()
result2 = df.groupby("A")["C", "D"].mean()
result3 = df.groupby("A")[df.columns[2:4]].mean()
result2 = df.groupby("A")[df.columns[2:4]].mean()

expected = df.loc[:, ["A", "C", "D"]].groupby("A").mean()

tm.assert_frame_equal(result, expected)
tm.assert_frame_equal(result2, expected)
tm.assert_frame_equal(result3, expected)

# per GH 23566 this should raise a deprecation warning
with tm.assert_produces_warning(DeprecationWarning):
df.groupby("A")["C", "D"].mean()

def test_getitem_numeric_column_names(self):
# GH #13731
Expand All @@ -91,14 +93,37 @@ def test_getitem_numeric_column_names(self):
}
)
result = df.groupby(0)[df.columns[1:3]].mean()
result2 = df.groupby(0)[2, 4].mean()
result3 = df.groupby(0)[[2, 4]].mean()
result2 = df.groupby(0)[[2, 4]].mean()

expected = df.loc[:, [0, 2, 4]].groupby(0).mean()

tm.assert_frame_equal(result, expected)
tm.assert_frame_equal(result2, expected)
tm.assert_frame_equal(result3, expected)

# per GH 23566 this should raise a deprecation warning
yehoshuadimarsky marked this conversation as resolved.
Show resolved Hide resolved
with tm.assert_produces_warning(DeprecationWarning):
df.groupby(0)[2, 4].mean()

def test_getitem_single_column(self):
df = DataFrame(
{
"A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"],
"B": ["one", "one", "two", "three", "two", "two", "one", "three"],
"C": np.random.randn(8),
"D": np.random.randn(8),
"E": np.random.randn(8),
}
)

result = df.groupby("A")["C"].mean()

as_frame = df.loc[:, ["A", "C"]].groupby("A").mean()
as_series = as_frame.iloc[:, 0]
expected = as_series

assert isinstance(result, Series)
yehoshuadimarsky marked this conversation as resolved.
Show resolved Hide resolved
assert not isinstance(result, DataFrame)
tm.assert_series_equal(result, expected)


# grouping
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/groupby/test_transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -319,7 +319,7 @@ def test_dispatch_transform(tsframe):

def test_transform_select_columns(df):
f = lambda x: x.mean()
result = df.groupby("A")["C", "D"].transform(f)
result = df.groupby("A")[["C", "D"]].transform(f)

selection = df[["C", "D"]]
expected = selection.groupby(df["A"]).transform(f)
Expand Down