diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 448ceffdaa1eb..ccf677ff45407 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -214,6 +214,7 @@ Other enhancements - :meth:`pandas.concat` will raise a ``ValueError`` when ``ignore_index=True`` and ``keys`` is not ``None`` (:issue:`59274`) - :py:class:`frozenset` elements in pandas objects are now natively printed (:issue:`60690`) - Add ``"delete_rows"`` option to ``if_exists`` argument in :meth:`DataFrame.to_sql` deleting all records of the table before inserting data (:issue:`37210`). +- Added :meth:`MultiIndex.insert_level` to insert new levels at specified positions in a MultiIndex (:issue:`62558`) - Added half-year offset classes :class:`HalfYearBegin`, :class:`HalfYearEnd`, :class:`BHalfYearBegin` and :class:`BHalfYearEnd` (:issue:`60928`) - Added support for ``axis=1`` with ``dict`` or :class:`Series` arguments into :meth:`DataFrame.fillna` (:issue:`4514`) - Added support to read and write from and to Apache Iceberg tables with the new :func:`read_iceberg` and :meth:`DataFrame.to_iceberg` functions (:issue:`61383`) @@ -228,7 +229,7 @@ Other enhancements - Support reading Stata 102-format (Stata 1) dta files (:issue:`58978`) - Support reading Stata 110-format (Stata 7) dta files (:issue:`47176`) - Switched wheel upload to **PyPI Trusted Publishing** (OIDC) for release-tag pushes in ``wheels.yml``. (:issue:`61718`) -- + .. --------------------------------------------------------------------------- .. _whatsnew_300.notable_bug_fixes: diff --git a/fastparquet b/fastparquet new file mode 100644 index 0000000000000..90e1311582647 Binary files /dev/null and b/fastparquet differ diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 0cd33491d68bb..dce1ee750d51e 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -2710,6 +2710,73 @@ def reorder_levels(self, order) -> MultiIndex: result = self._reorder_ilevels(order) return result + def insert_level(self, position: int, value, name=None) -> MultiIndex: + """ + Insert a new level at the specified position and return a new MultiIndex. + + Parameters + ---------- + position : int + The integer position where the new level should be inserted. + Must be between 0 and ``self.nlevels`` (inclusive). + value : scalar or sequence + Values for the inserted level. If a scalar is provided, it is + broadcast to the length of the index. If a sequence is provided, + it must be the same length as the index. + name : Hashable, default None + Name of the inserted level. If not provided, the inserted level + name will be ``None``. + + Returns + ------- + MultiIndex + A new ``MultiIndex`` with the inserted level. + + Examples + -------- + >>> idx = pd.MultiIndex.from_tuples([("A", 1), ("B", 2)], names=["x", "y"]) + >>> idx.insert_level(0, "grp") + MultiIndex([('grp', 'A', 1), ('grp', 'B', 2)], + names=[None, 'x', 'y']) + >>> idx.insert_level(1, ["L1", "L2"], name="z") + MultiIndex([('A', 'L1', 1), ('B', 'L2', 2)], + names=['x', 'z', 'y']) + """ + if not isinstance(position, int): + raise TypeError("position must be an integer") + + if position < 0 or position > self.nlevels: + raise ValueError(f"position must be between 0 and {self.nlevels}") + + if not hasattr(value, "__iter__") or isinstance(value, str): + value = [value] * len(self) + else: + value = list(value) + if len(value) != len(self): + raise ValueError("Length of values must match length of index") + + tuples = list(self) + + new_tuples = [] + for i, tup in enumerate(tuples): + if isinstance(tup, tuple): + new_tuple = list(tup) + new_tuple.insert(position, value[i]) + new_tuples.append(tuple(new_tuple)) + else: + new_tuple = [tup] + new_tuple.insert(position, value[i]) + new_tuples.append(tuple(new_tuple)) + + if self.names is not None: + new_names = list(self.names) + else: + new_names = [None] * self.nlevels + + new_names.insert(position, name) + + return MultiIndex.from_tuples(new_tuples, names=new_names) + def _reorder_ilevels(self, order) -> MultiIndex: if len(order) != self.nlevels: raise AssertionError( diff --git a/pandas/tests/frame/test_query_eval.py b/pandas/tests/frame/test_query_eval.py index b599be5d042fe..b31e8529b238b 100644 --- a/pandas/tests/frame/test_query_eval.py +++ b/pandas/tests/frame/test_query_eval.py @@ -160,21 +160,13 @@ def test_query_empty_string(self): df.query("") def test_query_duplicate_column_name(self, engine, parser): - df = DataFrame( - { - "A": range(3), - "B": range(3), - "C": range(3) - } - ).rename(columns={"B": "A"}) + df = DataFrame({"A": range(3), "B": range(3), "C": range(3)}).rename( + columns={"B": "A"} + ) res = df.query("C == 1", engine=engine, parser=parser) - expect = DataFrame( - [[1, 1, 1]], - columns=["A", "A", "C"], - index=[1] - ) + expect = DataFrame([[1, 1, 1]], columns=["A", "A", "C"], index=[1]) tm.assert_frame_equal(res, expect) @@ -1140,9 +1132,7 @@ def test_query_with_nested_special_character(self, parser, engine): [">=", operator.ge], ], ) - def test_query_lex_compare_strings( - self, parser, engine, op, func - ): + def test_query_lex_compare_strings(self, parser, engine, op, func): a = Series(np.random.default_rng(2).choice(list("abcde"), 20)) b = Series(np.arange(a.size)) df = DataFrame({"X": a, "Y": b}) diff --git a/pandas/tests/indexes/multi/test_constructors.py b/pandas/tests/indexes/multi/test_constructors.py index 6b461fcf3920d..d72c499ee4b39 100644 --- a/pandas/tests/indexes/multi/test_constructors.py +++ b/pandas/tests/indexes/multi/test_constructors.py @@ -870,3 +870,14 @@ def test_dtype_representation(using_infer_string): dtype=object, ) tm.assert_series_equal(result, expected) + + +def test_insert_level_integration(): + idx = MultiIndex.from_tuples([("A", 1), ("B", 2)]) + + df = pd.DataFrame({"data": [10, 20]}, index=idx) + new_idx = idx.insert_level(0, "group1") + df_new = df.set_index(new_idx) + + assert df_new.index.nlevels == 3 + assert len(df_new) == 2 diff --git a/pandas/tests/indexes/multi/test_insert_level.py b/pandas/tests/indexes/multi/test_insert_level.py new file mode 100644 index 0000000000000..ec4c47763aa1a --- /dev/null +++ b/pandas/tests/indexes/multi/test_insert_level.py @@ -0,0 +1,123 @@ +import pytest + +import pandas as pd +import pandas._testing as tm + + +class TestMultiIndexInsertLevel: + @pytest.mark.parametrize( + "position, value, name, expected_tuples, expected_names", + [ + ( + 0, + "new_value", + None, + [("new_value", "A", 1), ("new_value", "B", 2), ("new_value", "C", 3)], + [None, "level1", "level2"], + ), + ( + 1, + "middle", + None, + [("A", "middle", 1), ("B", "middle", 2), ("C", "middle", 3)], + ["level1", None, "level2"], + ), + ( + 0, + "new_val", + "new_level", + [("new_val", "A", 1), ("new_val", "B", 2), ("new_val", "C", 3)], + ["new_level", "level1", "level2"], + ), + ( + 1, + "middle", + "custom_name", + [("A", "middle", 1), ("B", "middle", 2), ("C", "middle", 3)], + ["level1", "custom_name", "level2"], + ), + ], + ) + def test_insert_level_basic( + self, position, value, name, expected_tuples, expected_names + ): + simple_idx = pd.MultiIndex.from_tuples( + [("A", 1), ("B", 2), ("C", 3)], names=["level1", "level2"] + ) + + result = simple_idx.insert_level(position, value, name=name) + expected = pd.MultiIndex.from_tuples(expected_tuples, names=expected_names) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize( + "position, value", + [ + (0, "start"), + (2, "end"), + ], + ) + def test_insert_level_edge_positions(self, position, value): + simple_idx = pd.MultiIndex.from_tuples( + [("A", 1), ("B", 2), ("C", 3)], names=["level1", "level2"] + ) + + result = simple_idx.insert_level(position, value) + assert result.nlevels == 3 + + @pytest.mark.parametrize( + "position, value, expected_error", + [ + (5, "invalid", "position must be between"), + (-1, "invalid", "position must be between"), + (1, ["too", "few"], "Length of values must match"), + ], + ) + def test_insert_level_error_cases(self, position, value, expected_error): + simple_idx = pd.MultiIndex.from_tuples( + [("A", 1), ("B", 2), ("C", 3)], names=["level1", "level2"] + ) + + with pytest.raises(ValueError, match=expected_error): + simple_idx.insert_level(position, value) + + @pytest.mark.parametrize( + "value", + [100, 1.5, None], + ) + def test_insert_level_with_different_data_types(self, value): + simple_idx = pd.MultiIndex.from_tuples( + [("A", 1), ("B", 2), ("C", 3)], names=["level1", "level2"] + ) + + result = simple_idx.insert_level(1, value) + assert result.nlevels == 3 + + def test_insert_level_preserves_original(self): + simple_idx = pd.MultiIndex.from_tuples( + [("A", 1), ("B", 2), ("C", 3)], names=["level1", "level2"] + ) + + original = simple_idx.copy() + simple_idx.insert_level(1, "temp") + + tm.assert_index_equal(original, simple_idx) + + def test_insert_level_empty_index(self): + empty_idx = pd.MultiIndex.from_tuples([], names=["level1", "level2"]) + + result = empty_idx.insert_level(0, []) + assert result.nlevels == 3 + assert len(result) == 0 + + def test_insert_level_with_different_values(self): + simple_idx = pd.MultiIndex.from_tuples( + [("A", 1), ("B", 2), ("C", 3)], names=["level1", "level2"] + ) + + new_values = ["X", "Y", "Z"] + result = simple_idx.insert_level(1, new_values) + expected = pd.MultiIndex.from_tuples( + [("A", "X", 1), ("B", "Y", 2), ("C", "Z", 3)], + names=["level1", None, "level2"], + ) + tm.assert_index_equal(result, expected) diff --git a/pyarrow b/pyarrow new file mode 100644 index 0000000000000..90e1311582647 Binary files /dev/null and b/pyarrow differ