diff --git a/doc/source/whatsnew/v1.0.2.rst b/doc/source/whatsnew/v1.0.2.rst index 123dfa07f4331..bc234bbd31662 100644 --- a/doc/source/whatsnew/v1.0.2.rst +++ b/doc/source/whatsnew/v1.0.2.rst @@ -20,6 +20,7 @@ Fixed regressions - Fixed regression in ``groupby(..).rolling(..).apply()`` (``RollingGroupby``) where the ``raw`` parameter was ignored (:issue:`31754`) - Fixed regression in :meth:`rolling(..).corr() ` when using a time offset (:issue:`31789`) - Fixed regression in :meth:`groupby(..).nunique() ` which was modifying the original values if ``NaN`` values were present (:issue:`31950`) +- Fixed regression in ``DataFrame.groupby`` raising a ``ValueError`` from an internal operation (:issue:`31802`) - Fixed regression where :func:`read_pickle` raised a ``UnicodeDecodeError`` when reading a py27 pickle with :class:`MultiIndex` column (:issue:`31988`). - Fixed regression in :class:`DataFrame` arithmetic operations with mis-matched columns (:issue:`31623`) - Fixed regression in :meth:`groupby(..).agg() ` calling a user-provided function an extra time on an empty input (:issue:`31760`) diff --git a/pandas/_libs/reduction.pyx b/pandas/_libs/reduction.pyx index b27072aa66708..29a5a73ef08d0 100644 --- a/pandas/_libs/reduction.pyx +++ b/pandas/_libs/reduction.pyx @@ -177,6 +177,8 @@ cdef class _BaseGrouper: object.__setattr__(cached_ityp, '_index_data', islider.buf) cached_ityp._engine.clear_mapping() object.__setattr__(cached_typ._data._block, 'values', vslider.buf) + object.__setattr__(cached_typ._data._block, 'mgr_locs', + slice(len(vslider.buf))) object.__setattr__(cached_typ, '_index', cached_ityp) object.__setattr__(cached_typ, 'name', self.name) diff --git a/pandas/tests/groupby/test_bin_groupby.py b/pandas/tests/groupby/test_bin_groupby.py index ff74d374e5e3f..152086c241a52 100644 --- a/pandas/tests/groupby/test_bin_groupby.py +++ b/pandas/tests/groupby/test_bin_groupby.py @@ -5,6 +5,7 @@ from pandas.core.dtypes.common import ensure_int64 +import pandas as pd from pandas import Index, Series, isna import pandas._testing as tm @@ -51,6 +52,30 @@ def test_series_bin_grouper(): tm.assert_almost_equal(counts, exp_counts) +def assert_block_lengths(x): + assert len(x) == len(x._data.blocks[0].mgr_locs) + return 0 + + +def cumsum_max(x): + x.cumsum().max() + return 0 + + +@pytest.mark.parametrize("func", [cumsum_max, assert_block_lengths]) +def test_mgr_locs_updated(func): + # https://github.com/pandas-dev/pandas/issues/31802 + # Some operations may require creating new blocks, which requires + # valid mgr_locs + df = pd.DataFrame({"A": ["a", "a", "a"], "B": ["a", "b", "b"], "C": [1, 1, 1]}) + result = df.groupby(["A", "B"]).agg(func) + expected = pd.DataFrame( + {"C": [0, 0]}, + index=pd.MultiIndex.from_product([["a"], ["a", "b"]], names=["A", "B"]), + ) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( "binner,closed,expected", [