BUG: groupby with resample using on parameter errors when selecting c…

…olumn to apply function closes pandas-dev#17813 Author: discort <odiscort@gmail.com> Closes pandas-dev#19433 from discort/fix_17813 and squashes the following commits: 2f25d40 [discort] Fixed bug in df.resample using 'on' parameter
toobaz · Feb 5, 2018 · 5b58a20 · 5b58a20
1 parent 98f3937
commit 5b58a20
Show file tree

Hide file tree

Showing 3 changed files with 30 additions and 5 deletions.
diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt
@@ -289,13 +289,17 @@ Convert to an xarray DataArray
    p.to_xarray()
 
 
+.. _whatsnew_0230.api_breaking.build_changes:
+
 Build Changes
 ^^^^^^^^^^^^^
 
 - Building pandas for development now requires ``cython >= 0.24`` (:issue:`18613`)
 - Building from source now explicitly requires ``setuptools`` in ``setup.py`` (:issue:`18113`)
 - Updated conda recipe to be in compliance with conda-build 3.0+ (:issue:`18002`)
 
+.. _whatsnew_0230.api_breaking.extract:
+
 Extraction of matching patterns from strings
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
@@ -594,8 +598,8 @@ Groupby/Resample/Rolling
 - Fixed regression in :func:`DataFrame.groupby` which would not emit an error when called with a tuple key not in the index (:issue:`18798`)
 - Bug in :func:`DataFrame.resample` which silently ignored unsupported (or mistyped) options for ``label``, ``closed`` and ``convention`` (:issue:`19303`)
 - Bug in :func:`DataFrame.groupby` where tuples were interpreted as lists of keys rather than as keys (:issue:`17979`, :issue:`18249`)
-- Bug in ``transform`` where particular aggregation functions were being incorrectly cast to match the dtype(s) of the grouped data (:issue:`19200`)
--
+- Bug in :func:`DataFrame.transform` where particular aggregation functions were being incorrectly cast to match the dtype(s) of the grouped data (:issue:`19200`)
+- Bug in :func:`DataFrame.groupby` passing the `on=` kwarg, and subsequently using ``.apply()`` (:issue:`17813`)
 
 Sparse
 ^^^^^^

diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
@@ -37,6 +37,7 @@
     _ensure_categorical,
     _ensure_float)
 from pandas.core.dtypes.cast import maybe_downcast_to_dtype
+from pandas.core.dtypes.generic import ABCSeries
 from pandas.core.dtypes.missing import isna, notna, _maybe_fill
 
 from pandas.core.base import (PandasObject, SelectionMixin, GroupByError,
@@ -423,6 +424,7 @@ def __init__(self, key=None, level=None, freq=None, axis=0, sort=False):
         self.obj = None
         self.indexer = None
         self.binner = None
+        self._grouper = None
 
     @property
     def ax(self):
@@ -465,12 +467,22 @@ def _set_grouper(self, obj, sort=False):
             raise ValueError(
                 "The Grouper cannot specify both a key and a level!")
 
+        # Keep self.grouper value before overriding
+        if self._grouper is None:
+            self._grouper = self.grouper
+
         # the key must be a valid info item
         if self.key is not None:
             key = self.key
-            if key not in obj._info_axis:
-                raise KeyError("The grouper name {0} is not found".format(key))
-            ax = Index(obj[key], name=key)
+            # The 'on' is already defined
+            if getattr(self.grouper, 'name', None) == key and \
+                    isinstance(obj, ABCSeries):
+                ax = self._grouper.take(obj.index)
+            else:
+                if key not in obj._info_axis:
+                    raise KeyError(
+                        "The grouper name {0} is not found".format(key))
+                ax = Index(obj[key], name=key)
 
         else:
             ax = obj._get_axis(self.axis)

diff --git a/pandas/tests/test_resample.py b/pandas/tests/test_resample.py
@@ -3077,6 +3077,15 @@ def test_getitem_multiple(self):
         result = r['buyer'].count()
         assert_series_equal(result, expected)
 
+    def test_groupby_resample_on_api_with_getitem(self):
+        # GH 17813
+        df = pd.DataFrame({'id': list('aabbb'),
+                           'date': pd.date_range('1-1-2016', periods=5),
+                           'data': 1})
+        exp = df.set_index('date').groupby('id').resample('2D')['data'].sum()
+        result = df.groupby('id').resample('2D', on='date')['data'].sum()
+        assert_series_equal(result, exp)
+
     def test_nearest(self):
 
         # GH 17496