From 7852905d7b659564e02756c3688b222d5d3ad29d Mon Sep 17 00:00:00 2001 From: yuanx749 Date: Sat, 29 Jul 2023 22:05:12 +0800 Subject: [PATCH] Fix Series.groupby raising OutOfBoundsDatetime with DatetimeIndex and month name --- doc/source/whatsnew/v2.1.0.rst | 1 + pandas/core/groupby/grouper.py | 7 +++++-- pandas/tests/groupby/test_groupby.py | 9 +++++++++ 3 files changed, 15 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 0fdec3175f635..aa66a10a163ac 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -628,6 +628,7 @@ Groupby/resample/rolling - Bug in :meth:`GroupBy.var` failing to raise ``TypeError`` when called with datetime64, timedelta64 or :class:`PeriodDtype` values (:issue:`52128`, :issue:`53045`) - Bug in :meth:`DataFrameGroupby.resample` with ``kind="period"`` raising ``AttributeError`` (:issue:`24103`) - Bug in :meth:`Resampler.ohlc` with empty object returning a :class:`Series` instead of empty :class:`DataFrame` (:issue:`42902`) +- Bug in :meth:`Series.groupby` raising an error when grouped :class:`Series` has a :class:`DatetimeIndex` index and a :class:`Series` with a name that is a month is given to the ``by`` argument (:issue:`48509`) - Bug in :meth:`SeriesGroupBy.count` and :meth:`DataFrameGroupBy.count` where the dtype would be ``np.int64`` for data with :class:`ArrowDtype` or masked dtypes (e.g. ``Int64``) (:issue:`53831`) - Bug in :meth:`SeriesGroupBy.nth` and :meth:`DataFrameGroupBy.nth` after performing column selection when using ``dropna="any"`` or ``dropna="all"`` would not subset columns (:issue:`53518`) - Bug in :meth:`SeriesGroupBy.nth` and :meth:`DataFrameGroupBy.nth` raised after performing column selection when using ``dropna="any"`` or ``dropna="all"`` resulted in rows being dropped (:issue:`53518`) diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index 4201887e13178..ea92fbae9566d 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -15,6 +15,7 @@ from pandas._config import using_copy_on_write from pandas._libs import lib +from pandas._libs.tslibs import OutOfBoundsDatetime from pandas.errors import InvalidIndexError from pandas.util._decorators import cache_readonly from pandas.util._exceptions import find_stack_level @@ -969,7 +970,7 @@ def is_in_obj(gpr) -> bool: # series is part of the object try: obj_gpr_column = obj[gpr.name] - except (KeyError, IndexError, InvalidIndexError): + except (KeyError, IndexError, InvalidIndexError, OutOfBoundsDatetime): return False if isinstance(gpr, Series) and isinstance(obj_gpr_column, Series): return gpr._mgr.references_same_values( # type: ignore[union-attr] @@ -978,11 +979,13 @@ def is_in_obj(gpr) -> bool: return False try: return gpr is obj[gpr.name] - except (KeyError, IndexError, InvalidIndexError): + except (KeyError, IndexError, InvalidIndexError, OutOfBoundsDatetime): # IndexError reached in e.g. test_skip_group_keys when we pass # lambda here # InvalidIndexError raised on key-types inappropriate for index, # e.g. DatetimeIndex.get_loc(tuple()) + # OutOfBoundsDatetime raised when obj is a Series with DatetimeIndex + # and gpr.name is month str return False for gpr, level in zip(keys, levels): diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index c8de1cd6785b6..5d8f1c7ac6787 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -3124,3 +3124,12 @@ def test_groupby_with_Time_Grouper(): df = test_data.groupby(Grouper(key="time2", freq="1T")).count().reset_index() tm.assert_frame_equal(df, expected_output) + + +def test_groupby_series_with_datetimeindex_month_name(): + # GH 48509 + s = Series([0, 1, 0], index=date_range("2022-01-01", periods=3), name="jan") + result = s.groupby(s).count() + expected = Series([2, 1], name="jan") + expected.index.name = "jan" + tm.assert_series_equal(result, expected)