diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index 007f5b7feb060..28e6f1c2c3573 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -1418,6 +1418,7 @@ Reshaping - Bug in :func:`pandas.concat` when concatenating a multicolumn DataFrame with tz-aware data against a DataFrame with a different number of columns (:issue:`22796`) - Bug in :func:`merge_asof` where confusing error message raised when attempting to merge with missing values (:issue:`23189`) - Bug in :meth:`DataFrame.nsmallest` and :meth:`DataFrame.nlargest` for dataframes that have a :class:`MultiIndex` for columns (:issue:`23033`). +- Bug in :func:`pandas.melt` when passing column names that are not present in ``DataFrame`` (:issue:`23575`) - Bug in :meth:`DataFrame.append` with a :class:`Series` with a dateutil timezone would raise a ``TypeError`` (:issue:`23682`) .. _whatsnew_0240.bug_fixes.sparse: diff --git a/pandas/core/reshape/melt.py b/pandas/core/reshape/melt.py index 6596e055db1a8..2dd6dc71b9d98 100644 --- a/pandas/core/reshape/melt.py +++ b/pandas/core/reshape/melt.py @@ -13,6 +13,7 @@ from pandas import compat from pandas.core.arrays import Categorical from pandas.core.frame import _shared_docs +from pandas.core.indexes.base import Index from pandas.core.reshape.concat import concat from pandas.core.tools.numeric import to_numeric @@ -24,6 +25,12 @@ def melt(frame, id_vars=None, value_vars=None, var_name=None, value_name='value', col_level=None): # TODO: what about the existing index? + # If multiindex, gather names of columns on all level for checking presence + # of `id_vars` and `value_vars` + if isinstance(frame.columns, ABCMultiIndex): + cols = [x for c in frame.columns for x in c] + else: + cols = list(frame.columns) if id_vars is not None: if not is_list_like(id_vars): id_vars = [id_vars] @@ -32,7 +39,13 @@ def melt(frame, id_vars=None, value_vars=None, var_name=None, raise ValueError('id_vars must be a list of tuples when columns' ' are a MultiIndex') else: + # Check that `id_vars` are in frame id_vars = list(id_vars) + missing = Index(np.ravel(id_vars)).difference(cols) + if not missing.empty: + raise KeyError("The following 'id_vars' are not present" + " in the DataFrame: {missing}" + "".format(missing=list(missing))) else: id_vars = [] @@ -45,6 +58,12 @@ def melt(frame, id_vars=None, value_vars=None, var_name=None, ' columns are a MultiIndex') else: value_vars = list(value_vars) + # Check that `value_vars` are in frame + missing = Index(np.ravel(value_vars)).difference(cols) + if not missing.empty: + raise KeyError("The following 'value_vars' are not present in" + " the DataFrame: {missing}" + "".format(missing=list(missing))) frame = frame.loc[:, id_vars + value_vars] else: frame = frame.copy() diff --git a/pandas/tests/reshape/test_melt.py b/pandas/tests/reshape/test_melt.py index 16ecb07c5f413..8fd3ae8bb387b 100644 --- a/pandas/tests/reshape/test_melt.py +++ b/pandas/tests/reshape/test_melt.py @@ -101,6 +101,14 @@ def test_vars_work_with_multiindex(self): result = self.df1.melt(id_vars=[('A', 'a')], value_vars=[('B', 'b')]) tm.assert_frame_equal(result, expected) + def test_single_vars_work_with_multiindex(self): + expected = DataFrame({ + 'A': {0: 1.067683, 1: -1.321405, 2: -0.807333}, + 'CAP': {0: 'B', 1: 'B', 2: 'B'}, + 'value': {0: -1.110463, 1: 0.368915, 2: 0.08298}}) + result = self.df1.melt(['A'], ['B'], col_level=0) + tm.assert_frame_equal(result, expected) + def test_tuple_vars_fail_with_multiindex(self): # melt should fail with an informative error message if # the columns have a MultiIndex and a tuple is passed @@ -233,6 +241,49 @@ def test_pandas_dtypes(self, col): expected.columns = ['klass', 'col', 'attribute', 'value'] tm.assert_frame_equal(result, expected) + def test_melt_missing_columns_raises(self): + # GH-23575 + # This test is to ensure that pandas raises an error if melting is + # attempted with column names absent from the dataframe + + # Generate data + df = pd.DataFrame(np.random.randn(5, 4), columns=list('abcd')) + + # Try to melt with missing `value_vars` column name + msg = "The following '{Var}' are not present in the DataFrame: {Col}" + with pytest.raises( + KeyError, + match=msg.format(Var='value_vars', Col="\\['C'\\]")): + df.melt(['a', 'b'], ['C', 'd']) + + # Try to melt with missing `id_vars` column name + with pytest.raises( + KeyError, + match=msg.format(Var='id_vars', Col="\\['A'\\]")): + df.melt(['A', 'b'], ['c', 'd']) + + # Multiple missing + with pytest.raises( + KeyError, + match=msg.format(Var='id_vars', + Col="\\['not_here', 'or_there'\\]")): + df.melt(['a', 'b', 'not_here', 'or_there'], ['c', 'd']) + + # Multiindex melt fails if column is missing from multilevel melt + multi = df.copy() + multi.columns = [list('ABCD'), list('abcd')] + with pytest.raises( + KeyError, + match=msg.format(Var='id_vars', + Col="\\['E'\\]")): + multi.melt([('E', 'a')], [('B', 'b')]) + # Multiindex fails if column is missing from single level melt + with pytest.raises( + KeyError, + match=msg.format(Var='value_vars', + Col="\\['F'\\]")): + multi.melt(['A'], ['F'], col_level=0) + class TestLreshape(object):