Skip to content

Commit

Permalink
API: MultiIndex.names|codes|levels returns tuples (#57042)
Browse files Browse the repository at this point in the history
* MultiIndex.names|codes|levels returns tuples

* Fix typing

* Add whatsnew note

* Fix stacking

* Fix doctest, test

* Fix other test

* Remove example
  • Loading branch information
mroeschke committed Feb 7, 2024
1 parent 06ec9a4 commit 99e3afe
Show file tree
Hide file tree
Showing 49 changed files with 246 additions and 492 deletions.
9 changes: 0 additions & 9 deletions doc/source/user_guide/groupby.rst
Original file line number Diff line number Diff line change
Expand Up @@ -137,15 +137,6 @@ We could naturally group by either the ``A`` or ``B`` columns, or both:

``df.groupby('A')`` is just syntactic sugar for ``df.groupby(df['A'])``.

If we also have a MultiIndex on columns ``A`` and ``B``, we can group by all
the columns except the one we specify:

.. ipython:: python
df2 = df.set_index(["A", "B"])
grouped = df2.groupby(level=df2.index.names.difference(["B"]))
grouped.sum()
The above GroupBy will split the DataFrame on its index (rows). To split by columns, first do
a transpose:

Expand Down
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ See :ref:`install.dependencies` and :ref:`install.optional_dependencies` for mor
Other API changes
^^^^^^^^^^^^^^^^^
- 3rd party ``py.path`` objects are no longer explicitly supported in IO methods. Use :py:class:`pathlib.Path` objects instead (:issue:`57091`)
- :attr:`MultiIndex.codes`, :attr:`MultiIndex.levels`, and :attr:`MultiIndex.names` now returns a ``tuple`` instead of a ``FrozenList`` (:issue:`53531`)
-

.. ---------------------------------------------------------------------------
Expand Down
11 changes: 7 additions & 4 deletions pandas/_libs/index.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,10 @@ import numpy as np

from pandas._typing import npt

from pandas import MultiIndex
from pandas import (
Index,
MultiIndex,
)
from pandas.core.arrays import ExtensionArray

multiindex_nulls_shift: int
Expand Down Expand Up @@ -70,13 +73,13 @@ class MaskedUInt8Engine(MaskedIndexEngine): ...
class MaskedBoolEngine(MaskedUInt8Engine): ...

class BaseMultiIndexCodesEngine:
levels: list[np.ndarray]
levels: tuple[np.ndarray]
offsets: np.ndarray # ndarray[uint64_t, ndim=1]

def __init__(
self,
levels: list[np.ndarray], # all entries hashable
labels: list[np.ndarray], # all entries integer-dtyped
levels: tuple[Index, ...], # all entries hashable
labels: tuple[np.ndarray], # all entries integer-dtyped
offsets: np.ndarray, # np.ndarray[np.uint64, ndim=1]
) -> None: ...
def get_indexer(self, target: npt.NDArray[np.object_]) -> npt.NDArray[np.intp]: ...
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/groupby/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -5698,7 +5698,7 @@ def _insert_quantile_level(idx: Index, qs: npt.NDArray[np.float64]) -> MultiInde
idx = cast(MultiIndex, idx)
levels = list(idx.levels) + [lev]
codes = [np.repeat(x, nqs) for x in idx.codes] + [np.tile(lev_codes, len(idx))]
mi = MultiIndex(levels=levels, codes=codes, names=idx.names + [None])
mi = MultiIndex(levels=levels, codes=codes, names=list(idx.names) + [None])
else:
nidx = len(idx)
idx_codes = coerce_indexer_dtype(np.arange(nidx), idx)
Expand Down
31 changes: 16 additions & 15 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,6 @@
disallow_ndim_indexing,
is_valid_positional_slice,
)
from pandas.core.indexes.frozen import FrozenList
from pandas.core.missing import clean_reindex_fill_method
from pandas.core.ops import get_op_result_name
from pandas.core.ops.invalid import make_invalid_op
Expand Down Expand Up @@ -1767,8 +1766,8 @@ def _get_default_index_names(

return names

def _get_names(self) -> FrozenList:
return FrozenList((self.name,))
def _get_names(self) -> tuple[Hashable | None, ...]:
return (self.name,)

def _set_names(self, values, *, level=None) -> None:
"""
Expand Down Expand Up @@ -1866,7 +1865,7 @@ def set_names(self, names, *, level=None, inplace: bool = False) -> Self | None:
('python', 2019),
( 'cobra', 2018),
( 'cobra', 2019)],
names=['species', 'year'])
names=('species', 'year'))
When renaming levels with a dict, levels can not be passed.
Expand All @@ -1875,7 +1874,7 @@ def set_names(self, names, *, level=None, inplace: bool = False) -> Self | None:
('python', 2019),
( 'cobra', 2018),
( 'cobra', 2019)],
names=['snake', 'year'])
names=('snake', 'year'))
"""
if level is not None and not isinstance(self, ABCMultiIndex):
raise ValueError("Level must be None for non-MultiIndex")
Expand Down Expand Up @@ -1959,19 +1958,19 @@ def rename(self, name, inplace: bool = False) -> Self | None:
>>> idx = pd.MultiIndex.from_product([['python', 'cobra'],
... [2018, 2019]],
... names=['kind', 'year'])
... names=('kind', 'year'))
>>> idx
MultiIndex([('python', 2018),
('python', 2019),
( 'cobra', 2018),
( 'cobra', 2019)],
names=['kind', 'year'])
names=('kind', 'year'))
>>> idx.rename(['species', 'year'])
MultiIndex([('python', 2018),
('python', 2019),
( 'cobra', 2018),
( 'cobra', 2019)],
names=['species', 'year'])
names=('species', 'year'))
>>> idx.rename('species')
Traceback (most recent call last):
TypeError: Must pass list-like as `names`.
Expand Down Expand Up @@ -2135,22 +2134,22 @@ def droplevel(self, level: IndexLabel = 0):
>>> mi
MultiIndex([(1, 3, 5),
(2, 4, 6)],
names=['x', 'y', 'z'])
names=('x', 'y', 'z'))
>>> mi.droplevel()
MultiIndex([(3, 5),
(4, 6)],
names=['y', 'z'])
names=('y', 'z'))
>>> mi.droplevel(2)
MultiIndex([(1, 3),
(2, 4)],
names=['x', 'y'])
names=('x', 'y'))
>>> mi.droplevel('z')
MultiIndex([(1, 3),
(2, 4)],
names=['x', 'y'])
names=('x', 'y'))
>>> mi.droplevel(['x', 'y'])
Index([5, 6], dtype='int64', name='z')
Expand Down Expand Up @@ -4865,7 +4864,9 @@ def _join_level(
"""
from pandas.core.indexes.multi import MultiIndex

def _get_leaf_sorter(labels: list[np.ndarray]) -> npt.NDArray[np.intp]:
def _get_leaf_sorter(
labels: tuple[np.ndarray, ...] | list[np.ndarray]
) -> npt.NDArray[np.intp]:
"""
Returns sorter for the inner most level while preserving the
order of higher levels.
Expand Down Expand Up @@ -6627,7 +6628,7 @@ def isin(self, values, level=None) -> npt.NDArray[np.bool_]:
MultiIndex([(1, 'red'),
(2, 'blue'),
(3, 'green')],
names=['number', 'color'])
names=('number', 'color'))
Check whether the strings in the 'color' level of the MultiIndex
are in a list of colors.
Expand Down Expand Up @@ -7608,7 +7609,7 @@ def ensure_index_from_sequences(sequences, names=None) -> Index:
>>> ensure_index_from_sequences([["a", "a"], ["a", "b"]], names=["L1", "L2"])
MultiIndex([('a', 'a'),
('a', 'b')],
names=['L1', 'L2'])
names=('L1', 'L2'))
See Also
--------
Expand Down
120 changes: 0 additions & 120 deletions pandas/core/indexes/frozen.py

This file was deleted.

0 comments on commit 99e3afe

Please sign in to comment.