Skip to content

Commit

Permalink
BUG: Let melt name multiple variable columns for labels from a `Mul…
Browse files Browse the repository at this point in the history
…tiIndex` (#58088)

* Let melt name variable columns for a multiindex

* Respond to comments

* Check is_iterator
  • Loading branch information
rob-sil committed May 25, 2024
1 parent 3b48b17 commit b162331
Show file tree
Hide file tree
Showing 3 changed files with 38 additions and 4 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -440,6 +440,7 @@ Missing
MultiIndex
^^^^^^^^^^
- :func:`DataFrame.loc` with ``axis=0`` and :class:`MultiIndex` when setting a value adds extra columns (:issue:`58116`)
- :meth:`DataFrame.melt` would not accept multiple names in ``var_name`` when the columns were a :class:`MultiIndex` (:issue:`58033`)
-

I/O
Expand Down
21 changes: 17 additions & 4 deletions pandas/core/reshape/melt.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,10 @@

import numpy as np

from pandas.core.dtypes.common import is_list_like
from pandas.core.dtypes.common import (
is_iterator,
is_list_like,
)
from pandas.core.dtypes.concat import concat_compat
from pandas.core.dtypes.missing import notna

Expand Down Expand Up @@ -64,9 +67,10 @@ def melt(
value_vars : scalar, tuple, list, or ndarray, optional
Column(s) to unpivot. If not specified, uses all columns that
are not set as `id_vars`.
var_name : scalar, default None
var_name : scalar, tuple, list, or ndarray, optional
Name to use for the 'variable' column. If None it uses
``frame.columns.name`` or 'variable'.
``frame.columns.name`` or 'variable'. Must be a scalar if columns are a
MultiIndex.
value_name : scalar, default 'value'
Name to use for the 'value' column, can't be an existing column label.
col_level : scalar, optional
Expand Down Expand Up @@ -217,7 +221,16 @@ def melt(
frame.columns.name if frame.columns.name is not None else "variable"
]
elif is_list_like(var_name):
raise ValueError(f"{var_name=} must be a scalar.")
if isinstance(frame.columns, MultiIndex):
if is_iterator(var_name):
var_name = list(var_name)
if len(var_name) > len(frame.columns):
raise ValueError(
f"{var_name=} has {len(var_name)} items, "
f"but the dataframe columns only have {len(frame.columns)} levels."
)
else:
raise ValueError(f"{var_name=} must be a scalar.")
else:
var_name = [var_name]

Expand Down
20 changes: 20 additions & 0 deletions pandas/tests/reshape/test_melt.py
Original file line number Diff line number Diff line change
Expand Up @@ -533,6 +533,26 @@ def test_melt_non_scalar_var_name_raises(self):
with pytest.raises(ValueError, match=r".* must be a scalar."):
df.melt(id_vars=["a"], var_name=[1, 2])

def test_melt_multiindex_columns_var_name(self):
# GH 58033
df = DataFrame({("A", "a"): [1], ("A", "b"): [2]})

expected = DataFrame(
[("A", "a", 1), ("A", "b", 2)], columns=["first", "second", "value"]
)

tm.assert_frame_equal(df.melt(var_name=["first", "second"]), expected)
tm.assert_frame_equal(df.melt(var_name=["first"]), expected[["first", "value"]])

def test_melt_multiindex_columns_var_name_too_many(self):
# GH 58033
df = DataFrame({("A", "a"): [1], ("A", "b"): [2]})

with pytest.raises(
ValueError, match="but the dataframe columns only have 2 levels"
):
df.melt(var_name=["first", "second", "third"])


class TestLreshape:
def test_pairs(self):
Expand Down

0 comments on commit b162331

Please sign in to comment.