Skip to content

Commit

Permalink
Sort by grouper values prior to calling groupby-apply
Browse files Browse the repository at this point in the history
Otherwise we run afoul of a reindex operation when we have multiple
repeated index values
  • Loading branch information
mrocklin committed Feb 1, 2018
1 parent d5407c2 commit 5a90cca
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 4 deletions.
1 change: 1 addition & 0 deletions dask/dataframe/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,7 @@ def _groupby_raise_unaligned(df, **kwargs):
def _groupby_slice_apply(df, grouper, key, func):
# No need to use raise if unaligned here - this is only called after
# shuffling, which makes everything aligned already
df = df.sort_values(grouper)
g = df.groupby(grouper)
if key:
g = g[key]
Expand Down
12 changes: 8 additions & 4 deletions dask/dataframe/tests/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,11 +120,15 @@ def func(df):
lambda df: [df['a'], df['b']],
pytest.mark.xfail(reason="not yet supported")(lambda df: [df['a'] > 2, df['b'] > 1])
])
def test_full_groupby_multilevel(grouper):
@pytest.mark.parametrize('reverse', [True, False])
def test_full_groupby_multilevel(grouper, reverse):
index = [0, 1, 3, 5, 6, 8, 9, 9, 9]
if reverse:
index = index[::-1]
df = pd.DataFrame({'a': [1, 2, 3, 4, 5, 6, 7, 8, 9],
'd': [1, 2, 3, 4, 5, 6, 7, 8, 9],
'b': [4, 5, 6, 3, 2, 1, 0, 0, 0]},
index=[0, 1, 3, 5, 6, 8, 9, 9, 9])
index=index)
ddf = dd.from_pandas(df, npartitions=3)

def func(df):
Expand All @@ -133,9 +137,9 @@ def func(df):

# last one causes a DeprcationWarning from pandas.
# See https://github.com/pandas-dev/pandas/issues/16481
meta = {"a": int, "d": int, "b": float}
assert_eq(df.groupby(grouper(df)).apply(func),
ddf.groupby(grouper(ddf)).apply(func, meta={"a": int, "d": int,
"b": float}))
ddf.groupby(grouper(ddf)).apply(func, meta=meta))


def test_groupby_dir():
Expand Down

0 comments on commit 5a90cca

Please sign in to comment.