Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file added fastparquet
Binary file not shown.
68 changes: 68 additions & 0 deletions pandas/core/indexes/multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -2707,6 +2707,74 @@ def reorder_levels(self, order) -> MultiIndex:
result = self._reorder_ilevels(order)
return result

def insert_level(self, position: int, value, name=None) -> MultiIndex:
"""
Insert a new level at the specified position and return a new MultiIndex.

Parameters
----------
position : int
The integer position where the new level should be inserted.
Must be between 0 and ``self.nlevels`` (inclusive).
value : scalar or sequence
Values for the inserted level. If a scalar is provided, it is
broadcast to the length of the index. If a sequence is provided,
it must be the same length as the index.
name : Hashable, default None
Name of the inserted level. If not provided, the inserted level
name will be ``None``.

Returns
-------
MultiIndex
A new ``MultiIndex`` with the inserted level.

Examples
--------
>>> idx = pd.MultiIndex.from_tuples([("A", 1), ("B", 2)], names=["x", "y"])
>>> idx.insert_level(0, "grp")
MultiIndex([('grp', 'A', 1), ('grp', 'B', 2)],
names=[None, 'x', 'y'])
>>> idx.insert_level(1, ["L1", "L2"], name="z")
MultiIndex([('A', 'L1', 1), ('B', 'L2', 2)],
names=['x', 'z', 'y'])
"""
if not isinstance(position, int):
raise TypeError("position must be an integer")

if position < 0 or position > self.nlevels:
raise ValueError(f"position must be between 0 and {self.nlevels}")

if not hasattr(value, "__iter__") or isinstance(value, str):
value = [value] * len(self)
else:
value = list(value)
if len(value) != len(self):
raise ValueError("Length of values must match length of index")

tuples = list(self)

new_tuples = []
for i, tup in enumerate(tuples):
if isinstance(tup, tuple):
new_tuple = list(tup)
new_tuple.insert(position, value[i])
new_tuples.append(tuple(new_tuple))
else:
new_tuple = [tup]
new_tuple.insert(position, value[i])
new_tuples.append(tuple(new_tuple))

if self.names is not None:
new_names = list(self.names)
else:
new_names = [None] * self.nlevels

new_names.insert(position, name)


return MultiIndex.from_tuples(new_tuples, names=new_names)

def _reorder_ilevels(self, order) -> MultiIndex:
if len(order) != self.nlevels:
raise AssertionError(
Expand Down
11 changes: 11 additions & 0 deletions pandas/tests/indexes/multi/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -870,3 +870,14 @@ def test_dtype_representation(using_infer_string):
dtype=object,
)
tm.assert_series_equal(result, expected)


def test_insert_level_integration():
idx = MultiIndex.from_tuples([("A", 1), ("B", 2)])

df = pd.DataFrame({"data": [10, 20]}, index=idx)
new_idx = idx.insert_level(0, "group1")
df_new = df.set_index(new_idx)

assert df_new.index.nlevels == 3
assert len(df_new) == 2
91 changes: 91 additions & 0 deletions pandas/tests/indexes/multi/test_insert_level.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
import pytest

import pandas as pd
import pandas._testing as tm


class TestMultiIndexInsertLevel:
def setup_method(self):
self.simple_idx = pd.MultiIndex.from_tuples(
[("A", 1), ("B", 2), ("C", 3)], names=["level1", "level2"]
)
self.empty_idx = pd.MultiIndex.from_tuples([], names=["level1", "level2"])
Comment on lines +8 to +12
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

NIT: I would prefer that you define this in the test body that should use it.


def test_insert_level_basic(self):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you parametrize this test?

result = self.simple_idx.insert_level(0, "new_value")
expected = pd.MultiIndex.from_tuples(
[("new_value", "A", 1), ("new_value", "B", 2), ("new_value", "C", 3)],
names=[None, "level1", "level2"],
)
tm.assert_index_equal(result, expected)

result = self.simple_idx.insert_level(1, "middle")
expected = pd.MultiIndex.from_tuples(
[("A", "middle", 1), ("B", "middle", 2), ("C", "middle", 3)],
names=["level1", None, "level2"],
)
tm.assert_index_equal(result, expected)

def test_insert_level_with_different_values(self):
new_values = ["X", "Y", "Z"]
result = self.simple_idx.insert_level(1, new_values)
expected = pd.MultiIndex.from_tuples(
[("A", "X", 1), ("B", "Y", 2), ("C", "Z", 3)],
names=["level1", None, "level2"],
)
tm.assert_index_equal(result, expected)

def test_insert_level_with_name(self):
result = self.simple_idx.insert_level(0, "new_val", name="new_level")
assert result.names[0] == "new_level"
Comment on lines +38 to +40
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You could also test adding names in the first test and remove this test.


def test_insert_level_edge_positions(self):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This can go into the first test.

result_start = self.simple_idx.insert_level(0, "start")
assert result_start.nlevels == 3

result_end = self.simple_idx.insert_level(2, "end")
assert result_end.nlevels == 3

def test_insert_level_error_cases(self):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you parametrize this?

with pytest.raises(ValueError, match="position must be between"):
self.simple_idx.insert_level(5, "invalid")

with pytest.raises(ValueError, match="position must be between"):
self.simple_idx.insert_level(-1, "invalid")

with pytest.raises(ValueError, match="Length of values must match"):
self.simple_idx.insert_level(1, ["too", "few"])

def test_insert_level_with_different_data_types(self):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These tests could go into the first test too.

result_int = self.simple_idx.insert_level(1, 100)

result_float = self.simple_idx.insert_level(1, 1.5)

result_none = self.simple_idx.insert_level(1, None)

assert result_int.nlevels == 3
assert result_float.nlevels == 3
assert result_none.nlevels == 3

def test_insert_level_preserves_original(self):
original = self.simple_idx.copy()
result = self.simple_idx.insert_level(1, "temp")

tm.assert_index_equal(original, self.simple_idx)

assert result.nlevels == original.nlevels + 1
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This assertion feels redundant against the first test.


def test_debug_names():
idx = pd.MultiIndex.from_tuples(
[("A", 1), ("B", 2), ("C", 3)], names=["level1", "level2"]
)
print("Original names:", idx.names)

result = idx.insert_level(0, "new_value")
print("Result names:", result.names)

expected = pd.MultiIndex.from_tuples(
[("new_value", "A", 1), ("new_value", "B", 2), ("new_value", "C", 3)],
names=[None, "level1", "level2"],
)
print("Expected names:", expected.names)
Comment on lines +78 to +91
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why does this exist?

Binary file added pyarrow
Binary file not shown.
Loading