Skip to content
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v2.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -243,7 +243,7 @@ Bug fixes
Categorical
^^^^^^^^^^^
- Bug in :meth:`Categorical.set_categories` losing dtype information (:issue:`48812`)
-
- Bug in :meth:`DataFrame.groupby` and :meth:`Series.groupby` would reorder categories when used as a grouper (:issue:`48749`)

Datetimelike
^^^^^^^^^^^^
Expand Down
36 changes: 1 addition & 35 deletions pandas/core/groupby/categorical.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
from __future__ import annotations

from typing import TYPE_CHECKING

import numpy as np

from pandas.core.algorithms import unique1d
Expand All @@ -11,9 +9,6 @@
recode_for_categories,
)

if TYPE_CHECKING:
from pandas.core.indexes.api import CategoricalIndex


def recode_for_groupby(
c: Categorical, sort: bool, observed: bool
Expand Down Expand Up @@ -77,7 +72,7 @@ def recode_for_groupby(
# sort=False should order groups in as-encountered order (GH-8868)

# xref GH:46909: Re-ordering codes faster than using (set|add|reorder)_categories
all_codes = np.arange(c.categories.nunique(), dtype=np.int8)
all_codes = np.arange(c.categories.nunique())
# GH 38140: exclude nan from indexer for categories
unique_notnan_codes = unique1d(c.codes[c.codes != -1])
if c.ordered:
Expand All @@ -90,32 +85,3 @@ def recode_for_groupby(
take_codes = unique_notnan_codes

return Categorical(c, c.unique().categories.take(take_codes)), None


def recode_from_groupby(
c: Categorical, sort: bool, ci: CategoricalIndex
) -> CategoricalIndex:
"""
Reverse the codes_to_groupby to account for sort / observed.

Parameters
----------
c : Categorical
sort : bool
The value of the sort parameter groupby was called with.
ci : CategoricalIndex
The codes / categories to recode

Returns
-------
CategoricalIndex
"""
# we re-order to the original category orderings
if sort:
# error: "CategoricalIndex" has no attribute "set_categories"
return ci.set_categories(c.categories) # type: ignore[attr-defined]

# we are not sorting, so add unobserved to the end
new_cats = c.categories[~c.categories.isin(ci.categories)]
# error: "CategoricalIndex" has no attribute "add_categories"
return ci.add_categories(new_cats) # type: ignore[attr-defined]
14 changes: 9 additions & 5 deletions pandas/core/groupby/grouper.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,10 +39,7 @@
import pandas.core.common as com
from pandas.core.frame import DataFrame
from pandas.core.groupby import ops
from pandas.core.groupby.categorical import (
recode_for_groupby,
recode_from_groupby,
)
from pandas.core.groupby.categorical import recode_for_groupby
from pandas.core.indexes.api import (
CategoricalIndex,
Index,
Expand Down Expand Up @@ -462,6 +459,7 @@ class Grouping:
_group_index: Index | None = None
_passed_categorical: bool
_all_grouper: Categorical | None
_orig_cats: Index | None
_index: Index

def __init__(
Expand All @@ -479,6 +477,7 @@ def __init__(
self._orig_grouper = grouper
self.grouping_vector = _convert_grouper(index, grouper)
self._all_grouper = None
self._orig_cats = None
self._index = index
self._sort = sort
self.obj = obj
Expand Down Expand Up @@ -529,6 +528,7 @@ def __init__(
# a passed Categorical
self._passed_categorical = True

self._orig_cats = self.grouping_vector.categories
self.grouping_vector, self._all_grouper = recode_for_groupby(
self.grouping_vector, sort, observed
)
Expand Down Expand Up @@ -646,7 +646,9 @@ def result_index(self) -> Index:
if self._all_grouper is not None:
group_idx = self.group_index
assert isinstance(group_idx, CategoricalIndex)
return recode_from_groupby(self._all_grouper, self._sort, group_idx)
categories = self._all_grouper.categories
# set_categories is dynamically added
return group_idx.set_categories(categories) # type: ignore[attr-defined]
return self.group_index

@cache_readonly
Expand Down Expand Up @@ -678,6 +680,8 @@ def _codes_and_uniques(self) -> tuple[npt.NDArray[np.signedinteger], ArrayLike]:
uniques = Categorical.from_codes(
codes=ucodes, categories=categories, ordered=cat.ordered
)
if not self._observed:
uniques = uniques.reorder_categories(self._orig_cats)
return cat.codes, uniques

elif isinstance(self.grouping_vector, ops.BaseGrouper):
Expand Down
Loading