Skip to content

Commit

Permalink
Disable getitem for GroupBy (#4330)
Browse files Browse the repository at this point in the history
  • Loading branch information
stinodego committed Aug 9, 2022
1 parent 43bee70 commit d74f306
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 27 deletions.
1 change: 0 additions & 1 deletion py-polars/docs/source/reference/dataframe.rst
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,6 @@ This namespace comes available by calling `DataFrame.groupby(..)`.
GroupBy.apply
GroupBy.count
GroupBy.first
GroupBy.groups
GroupBy.head
GroupBy.last
GroupBy.max
Expand Down
38 changes: 16 additions & 22 deletions py-polars/polars/internals/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -6049,11 +6049,12 @@ def __init__(
self.by = by
self.maintain_order = maintain_order

def __getitem__(self, item: Any) -> GBSelection[DF]:
print(
"accessing GroupBy by index is deprecated, consider using the `.agg` method"
)
return self._select(item)
def __iter__(self) -> Iterable[Any]:
groups_df = self._groups()
groups = groups_df["groups"]
df = self._dataframe_class._from_pydf(self._df)
for i in range(groups_df.height):
yield df[groups[i]]

def _select(self, columns: str | list[str]) -> GBSelection[DF]: # pragma: no cover
"""
Expand All @@ -6079,14 +6080,16 @@ def _select(self, columns: str | list[str]) -> GBSelection[DF]: # pragma: no co
dataframe_class=self._dataframe_class,
)

def __iter__(self) -> Iterable[Any]:
groups_df = self.groups()
groups = groups_df["groups"]
df = self._dataframe_class._from_pydf(self._df)
for i in range(groups_df.height):
yield df[groups[i]]
def _select_all(self) -> GBSelection[DF]:
"""Select all columns for aggregation."""
return GBSelection(
self._df,
self.by,
None,
dataframe_class=self._dataframe_class,
)

def groups(self) -> DF: # pragma: no cover
def _groups(self) -> DF: # pragma: no cover
"""
Return a `DataFrame` with:
Expand All @@ -6104,7 +6107,7 @@ def groups(self) -> DF: # pragma: no cover
... }
... )
>>> df.groupby("d").groups().sort(by="d")
>>> df.groupby("d")._groups().sort(by="d")
shape: (3, 2)
┌────────┬───────────┐
│ d ┆ groups │
Expand Down Expand Up @@ -6382,15 +6385,6 @@ def tail(self, n: int = 5) -> DF:
)
return self._dataframe_class._from_pydf(df._df)

def _select_all(self) -> GBSelection[DF]:
"""Select all columns for aggregation."""
return GBSelection(
self._df,
self.by,
None,
dataframe_class=self._dataframe_class,
)

def pivot(
self, pivot_column: str | list[str], values_column: str | list[str]
) -> PivotOps[DF]:
Expand Down
4 changes: 0 additions & 4 deletions py-polars/tests/test_df.py
Original file line number Diff line number Diff line change
Expand Up @@ -343,10 +343,6 @@ def test_groupby() -> None:

assert df.groupby("a").apply(lambda df: df[["c"]].sum()).sort("c")["c"][0] == 1

with pytest.deprecated_call():
df_groups = df.groupby("a").groups().sort("a")
assert df_groups["a"].series_equal(pl.Series("a", ["a", "b", "c"]))

with pytest.deprecated_call():
# TODO: find a way to avoid indexing into GroupBy
for subdf in df.groupby("a"): # type: ignore[attr-defined]
Expand Down

0 comments on commit d74f306

Please sign in to comment.