From a3e5b4dd4a2afaf46f98d298f31a01acc558c659 Mon Sep 17 00:00:00 2001 From: Matthew Rocklin Date: Thu, 1 Feb 2018 09:09:17 -0500 Subject: [PATCH] sort grouper if series/dataframe, sort df in key/name --- dask/dataframe/groupby.py | 8 +++++++- docs/source/changelog.rst | 1 + 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/dask/dataframe/groupby.py b/dask/dataframe/groupby.py index f2a8ec5da5b..00f2d6f9a32 100644 --- a/dask/dataframe/groupby.py +++ b/dask/dataframe/groupby.py @@ -141,7 +141,13 @@ def _groupby_raise_unaligned(df, **kwargs): def _groupby_slice_apply(df, grouper, key, func): # No need to use raise if unaligned here - this is only called after # shuffling, which makes everything aligned already - df = df.sort_values(grouper) + if isinstance(grouper, (pd.DataFrame, pd.Series, pd.Index)): + grouper = grouper.sort_values() + else: + try: + df = df.sort_values(grouper) + except KeyError: # this fails when the grouper includes the index + pass g = df.groupby(grouper) if key: g = g[key] diff --git a/docs/source/changelog.rst b/docs/source/changelog.rst index 0fbd4945fb3..e108b8a2f4c 100644 --- a/docs/source/changelog.rst +++ b/docs/source/changelog.rst @@ -18,6 +18,7 @@ DataFrame +++++++++ - Support month timedeltas in repartition(freq=...) (:pr:`3110`) `Matthew Rocklin`_ +- Sort grouper values prior to groupby-apply (:pr:`3118`) `Matthew Rocklin`_ Bag +++