From b162331554d7c7f6fd46ddde1ff3908f2dc8bcce Mon Sep 17 00:00:00 2001 From: Rob <124158982+rob-sil@users.noreply.github.com> Date: Sat, 25 May 2024 15:41:01 -0500 Subject: [PATCH] BUG: Let `melt` name multiple variable columns for labels from a `MultiIndex` (#58088) * Let melt name variable columns for a multiindex * Respond to comments * Check is_iterator --- doc/source/whatsnew/v3.0.0.rst | 1 + pandas/core/reshape/melt.py | 21 +++++++++++++++++---- pandas/tests/reshape/test_melt.py | 20 ++++++++++++++++++++ 3 files changed, 38 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index a15da861cfbec..6a6abcf2d48fe 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -440,6 +440,7 @@ Missing MultiIndex ^^^^^^^^^^ - :func:`DataFrame.loc` with ``axis=0`` and :class:`MultiIndex` when setting a value adds extra columns (:issue:`58116`) +- :meth:`DataFrame.melt` would not accept multiple names in ``var_name`` when the columns were a :class:`MultiIndex` (:issue:`58033`) - I/O diff --git a/pandas/core/reshape/melt.py b/pandas/core/reshape/melt.py index b4720306094e9..294de2cf2fe1d 100644 --- a/pandas/core/reshape/melt.py +++ b/pandas/core/reshape/melt.py @@ -5,7 +5,10 @@ import numpy as np -from pandas.core.dtypes.common import is_list_like +from pandas.core.dtypes.common import ( + is_iterator, + is_list_like, +) from pandas.core.dtypes.concat import concat_compat from pandas.core.dtypes.missing import notna @@ -64,9 +67,10 @@ def melt( value_vars : scalar, tuple, list, or ndarray, optional Column(s) to unpivot. If not specified, uses all columns that are not set as `id_vars`. - var_name : scalar, default None + var_name : scalar, tuple, list, or ndarray, optional Name to use for the 'variable' column. If None it uses - ``frame.columns.name`` or 'variable'. + ``frame.columns.name`` or 'variable'. Must be a scalar if columns are a + MultiIndex. value_name : scalar, default 'value' Name to use for the 'value' column, can't be an existing column label. col_level : scalar, optional @@ -217,7 +221,16 @@ def melt( frame.columns.name if frame.columns.name is not None else "variable" ] elif is_list_like(var_name): - raise ValueError(f"{var_name=} must be a scalar.") + if isinstance(frame.columns, MultiIndex): + if is_iterator(var_name): + var_name = list(var_name) + if len(var_name) > len(frame.columns): + raise ValueError( + f"{var_name=} has {len(var_name)} items, " + f"but the dataframe columns only have {len(frame.columns)} levels." + ) + else: + raise ValueError(f"{var_name=} must be a scalar.") else: var_name = [var_name] diff --git a/pandas/tests/reshape/test_melt.py b/pandas/tests/reshape/test_melt.py index f224a45ca3279..49200face66c5 100644 --- a/pandas/tests/reshape/test_melt.py +++ b/pandas/tests/reshape/test_melt.py @@ -533,6 +533,26 @@ def test_melt_non_scalar_var_name_raises(self): with pytest.raises(ValueError, match=r".* must be a scalar."): df.melt(id_vars=["a"], var_name=[1, 2]) + def test_melt_multiindex_columns_var_name(self): + # GH 58033 + df = DataFrame({("A", "a"): [1], ("A", "b"): [2]}) + + expected = DataFrame( + [("A", "a", 1), ("A", "b", 2)], columns=["first", "second", "value"] + ) + + tm.assert_frame_equal(df.melt(var_name=["first", "second"]), expected) + tm.assert_frame_equal(df.melt(var_name=["first"]), expected[["first", "value"]]) + + def test_melt_multiindex_columns_var_name_too_many(self): + # GH 58033 + df = DataFrame({("A", "a"): [1], ("A", "b"): [2]}) + + with pytest.raises( + ValueError, match="but the dataframe columns only have 2 levels" + ): + df.melt(var_name=["first", "second", "third"]) + class TestLreshape: def test_pairs(self):