diff --git a/doc/source/release.rst b/doc/source/release.rst index dce1a25cf434b..f4d61e70e94b3 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -75,13 +75,14 @@ pandas 0.12 - Simplified the API and added a describe method to Categorical - ``melt`` now accepts the optional parameters ``var_name`` and ``value_name`` to specify custom column names of the returned DataFrame (:issue:`3649`), - thanks @hoechenberger + thanks @hoechenberger. If ``var_name`` is not specified and ``dataframe.columns.name`` + is not None, then this will be used as the ``var_name`` (:issue:`4144`). - clipboard functions use pyperclip (no dependencies on Windows, alternative dependencies offered for Linux) (:issue:`3837`). - Plotting functions now raise a ``TypeError`` before trying to plot anything if the associated objects have have a dtype of ``object`` (:issue:`1818`, - :issue:`3572`, :issue:`3911`, :issue:`3912`), but they will try to convert object arrays to - numeric arrays if possible so that you can still plot, for example, an + :issue:`3572`, :issue:`3911`, :issue:`3912`), but they will try to convert object + arrays to numeric arrays if possible so that you can still plot, for example, an object array with floats. This happens before any drawing takes place which elimnates any spurious plots from showing up. - Added Faq section on repr display options, to help users customize their setup. diff --git a/pandas/core/reshape.py b/pandas/core/reshape.py index 2cbeb1cf58a8f..e9d5fe124fc74 100644 --- a/pandas/core/reshape.py +++ b/pandas/core/reshape.py @@ -601,7 +601,7 @@ def _stack_multi_columns(frame, level=-1, dropna=True): def melt(frame, id_vars=None, value_vars=None, - var_name='variable', value_name='value'): + var_name=None, value_name='value'): """ "Unpivots" a DataFrame from wide format to long format, optionally leaving id variables set @@ -611,8 +611,8 @@ def melt(frame, id_vars=None, value_vars=None, frame : DataFrame id_vars : tuple, list, or ndarray value_vars : tuple, list, or ndarray - var_name : scalar - value_name : scalar + var_name : scalar, if None uses frame.column.name or 'variable' + value_name : scalar, default 'value' Examples -------- @@ -634,6 +634,7 @@ def melt(frame, id_vars=None, value_vars=None, a B 1 b B 3 c B 5 + """ # TODO: what about the existing index? if id_vars is not None: @@ -651,6 +652,9 @@ def melt(frame, id_vars=None, value_vars=None, else: frame = frame.copy() + if var_name is None: + var_name = frame.columns.name if frame.columns.name is not None else 'variable' + N, K = frame.shape K -= len(id_vars) diff --git a/pandas/tests/test_reshape.py b/pandas/tests/test_reshape.py index 6e5f6bffd7544..09c63746c8d4b 100644 --- a/pandas/tests/test_reshape.py +++ b/pandas/tests/test_reshape.py @@ -20,105 +20,141 @@ _multiprocess_can_split_ = True -def test_melt(): - df = tm.makeTimeDataFrame()[:10] - df['id1'] = (df['A'] > 0).astype(np.int64) - df['id2'] = (df['B'] > 0).astype(np.int64) - - var_name = 'var' - value_name = 'val' - - # Default column names - result = melt(df) - result1 = melt(df, id_vars=['id1']) - result2 = melt(df, id_vars=['id1', 'id2']) - result3 = melt(df, id_vars=['id1', 'id2'], - value_vars='A') - result4 = melt(df, id_vars=['id1', 'id2'], - value_vars=['A', 'B']) - - expected4 = DataFrame({'id1': df['id1'].tolist() * 2, - 'id2': df['id2'].tolist() * 2, - 'variable': ['A']*10 + ['B']*10, - 'value': df['A'].tolist() + df['B'].tolist()}, - columns=['id1', 'id2', 'variable', 'value']) - tm.assert_frame_equal(result4, expected4) - - # Supply custom name for the 'variable' column - result5 = melt(df, var_name=var_name) - result6 = melt(df, id_vars=['id1'], var_name=var_name) - result7 = melt(df, id_vars=['id1', 'id2'], var_name=var_name) - result8 = melt(df, id_vars=['id1', 'id2'], - value_vars='A', var_name=var_name) - result9 = melt(df, id_vars=['id1', 'id2'], - value_vars=['A', 'B'], var_name=var_name) - - expected9 = DataFrame({'id1': df['id1'].tolist() * 2, - 'id2': df['id2'].tolist() * 2, - var_name: ['A']*10 + ['B']*10, - 'value': df['A'].tolist() + df['B'].tolist()}, - columns=['id1', 'id2', var_name, 'value']) - tm.assert_frame_equal(result9, expected9) - - # Supply custom name for the 'value' column - result10 = melt(df, value_name=value_name) - result11 = melt(df, id_vars=['id1'], value_name=value_name) - result12 = melt(df, id_vars=['id1', 'id2'], value_name=value_name) - result13 = melt(df, id_vars=['id1', 'id2'], - value_vars='A', value_name=value_name) - result14 = melt(df, id_vars=['id1', 'id2'], - value_vars=['A', 'B'], value_name=value_name) - - expected14 = DataFrame({'id1': df['id1'].tolist() * 2, - 'id2': df['id2'].tolist() * 2, - 'variable': ['A']*10 + ['B']*10, - value_name: df['A'].tolist() + df['B'].tolist()}, - columns=['id1', 'id2', 'variable', value_name]) - tm.assert_frame_equal(result14, expected14) - - # Supply custom names for the 'variable' and 'value' columns - result15 = melt(df, var_name=var_name, value_name=value_name) - result16 = melt(df, id_vars=['id1'], var_name=var_name, value_name=value_name) - result17 = melt(df, id_vars=['id1', 'id2'], - var_name=var_name, value_name=value_name) - result18 = melt(df, id_vars=['id1', 'id2'], - value_vars='A', var_name=var_name, value_name=value_name) - result19 = melt(df, id_vars=['id1', 'id2'], - value_vars=['A', 'B'], var_name=var_name, value_name=value_name) - - expected19 = DataFrame({'id1': df['id1'].tolist() * 2, - 'id2': df['id2'].tolist() * 2, - var_name: ['A']*10 + ['B']*10, - value_name: df['A'].tolist() + df['B'].tolist()}, - columns=['id1', 'id2', var_name, value_name]) - tm.assert_frame_equal(result19, expected19) - -def test_convert_dummies(): - df = DataFrame({'A': ['foo', 'bar', 'foo', 'bar', - 'foo', 'bar', 'foo', 'foo'], - 'B': ['one', 'one', 'two', 'three', - 'two', 'two', 'one', 'three'], - 'C': np.random.randn(8), - 'D': np.random.randn(8)}) - - result = convert_dummies(df, ['A', 'B']) - result2 = convert_dummies(df, ['A', 'B'], prefix_sep='.') - - expected = DataFrame({'A_foo': [1, 0, 1, 0, 1, 0, 1, 1], - 'A_bar': [0, 1, 0, 1, 0, 1, 0, 0], - 'B_one': [1, 1, 0, 0, 0, 0, 1, 0], - 'B_two': [0, 0, 1, 0, 1, 1, 0, 0], - 'B_three': [0, 0, 0, 1, 0, 0, 0, 1], - 'C': df['C'].values, - 'D': df['D'].values}, - columns=result.columns, dtype=float) - expected2 = expected.rename(columns=lambda x: x.replace('_', '.')) - - tm.assert_frame_equal(result, expected) - tm.assert_frame_equal(result2, expected2) - - -class Test_lreshape(unittest.TestCase): +class TestMelt(unittest.TestCase): + + def setUp(self): + self.df = tm.makeTimeDataFrame()[:10] + self.df['id1'] = (self.df['A'] > 0).astype(np.int64) + self.df['id2'] = (self.df['B'] > 0).astype(np.int64) + + self.var_name = 'var' + self.value_name = 'val' + + def test_default_col_names(self): + result = melt(self.df) + self.assertEqual(result.columns.tolist(), ['variable', 'value']) + + result1 = melt(self.df, id_vars=['id1']) + self.assertEqual(result1.columns.tolist(), ['id1', 'variable', 'value']) + + result2 = melt(self.df, id_vars=['id1', 'id2']) + self.assertEqual(result2.columns.tolist(), ['id1', 'id2', 'variable', 'value']) + + def test_value_vars(self): + result3 = melt(self.df, id_vars=['id1', 'id2'], value_vars='A') + self.assertEqual(len(result3), 10) + + result4 = melt(self.df, id_vars=['id1', 'id2'], value_vars=['A', 'B']) + expected4 = DataFrame({'id1': self.df['id1'].tolist() * 2, + 'id2': self.df['id2'].tolist() * 2, + 'variable': ['A']*10 + ['B']*10, + 'value': self.df['A'].tolist() + self.df['B'].tolist()}, + columns=['id1', 'id2', 'variable', 'value']) + tm.assert_frame_equal(result4, expected4) + + def test_custom_var_name(self): + result5 = melt(self.df, var_name=self.var_name) + self.assertEqual(result5.columns.tolist(), ['var', 'value']) + + result6 = melt(self.df, id_vars=['id1'], var_name=self.var_name) + self.assertEqual(result6.columns.tolist(), ['id1', 'var', 'value']) + + result7 = melt(self.df, id_vars=['id1', 'id2'], var_name=self.var_name) + self.assertEqual(result7.columns.tolist(), ['id1', 'id2', 'var', 'value']) + + result8 = melt(self.df, id_vars=['id1', 'id2'], + value_vars='A', var_name=self.var_name) + self.assertEqual(result8.columns.tolist(), ['id1', 'id2', 'var', 'value']) + + result9 = melt(self.df, id_vars=['id1', 'id2'], + value_vars=['A', 'B'], var_name=self.var_name) + expected9 = DataFrame({'id1': self.df['id1'].tolist() * 2, + 'id2': self.df['id2'].tolist() * 2, + self.var_name: ['A']*10 + ['B']*10, + 'value': self.df['A'].tolist() + self.df['B'].tolist()}, + columns=['id1', 'id2', self.var_name, 'value']) + tm.assert_frame_equal(result9, expected9) + + def test_custom_value_name(self): + result10 = melt(self.df, value_name=self.value_name) + self.assertEqual(result10.columns.tolist(), ['variable', 'val']) + + result11 = melt(self.df, id_vars=['id1'], value_name=self.value_name) + self.assertEqual(result11.columns.tolist(), ['id1', 'variable', 'val']) + + result12 = melt(self.df, id_vars=['id1', 'id2'], value_name=self.value_name) + self.assertEqual(result12.columns.tolist(), ['id1', 'id2', 'variable', 'val']) + + result13 = melt(self.df, id_vars=['id1', 'id2'], + value_vars='A', value_name=self.value_name) + self.assertEqual(result13.columns.tolist(), ['id1', 'id2', 'variable', 'val']) + + result14 = melt(self.df, id_vars=['id1', 'id2'], + value_vars=['A', 'B'], value_name=self.value_name) + expected14 = DataFrame({'id1': self.df['id1'].tolist() * 2, + 'id2': self.df['id2'].tolist() * 2, + 'variable': ['A']*10 + ['B']*10, + self.value_name: self.df['A'].tolist() + self.df['B'].tolist()}, + columns=['id1', 'id2', 'variable', self.value_name]) + tm.assert_frame_equal(result14, expected14) + + def test_custom_var_and_value_name(self): + + result15 = melt(self.df, var_name=self.var_name, value_name=self.value_name) + self.assertEqual(result15.columns.tolist(), ['var', 'val']) + + result16 = melt(self.df, id_vars=['id1'], var_name=self.var_name, value_name=self.value_name) + self.assertEqual(result16.columns.tolist(), ['id1', 'var', 'val']) + + result17 = melt(self.df, id_vars=['id1', 'id2'], + var_name=self.var_name, value_name=self.value_name) + self.assertEqual(result17.columns.tolist(), ['id1', 'id2', 'var', 'val']) + + result18 = melt(df, id_vars=['id1', 'id2'], + value_vars='A', var_name=self.var_name, value_name=self.value_name) + self.assertEqual(result18.columns.tolist(), ['id1', 'id2', 'var', 'val']) + + result19 = melt(self.df, id_vars=['id1', 'id2'], + value_vars=['A', 'B'], var_name=self.var_name, value_name=self.value_name) + expected19 = DataFrame({'id1': self.df['id1'].tolist() * 2, + 'id2': self.df['id2'].tolist() * 2, + var_name: ['A']*10 + ['B']*10, + value_name: self.df['A'].tolist() + self.df['B'].tolist()}, + columns=['id1', 'id2', self.var_name, self.value_name]) + tm.assert_frame_equal(result19, expected19) + + def test_custom_var_and_value_name(self): + self.df.columns.name = 'foo' + result20 = melt(self.df) + self.assertEqual(result20.columns.tolist(), ['foo', 'value']) + +class TestConvertDummies(unittest.TestCase): + def test_convert_dummies(self): + df = DataFrame({'A': ['foo', 'bar', 'foo', 'bar', + 'foo', 'bar', 'foo', 'foo'], + 'B': ['one', 'one', 'two', 'three', + 'two', 'two', 'one', 'three'], + 'C': np.random.randn(8), + 'D': np.random.randn(8)}) + + result = convert_dummies(df, ['A', 'B']) + result2 = convert_dummies(df, ['A', 'B'], prefix_sep='.') + + expected = DataFrame({'A_foo': [1, 0, 1, 0, 1, 0, 1, 1], + 'A_bar': [0, 1, 0, 1, 0, 1, 0, 0], + 'B_one': [1, 1, 0, 0, 0, 0, 1, 0], + 'B_two': [0, 0, 1, 0, 1, 1, 0, 0], + 'B_three': [0, 0, 0, 1, 0, 0, 0, 1], + 'C': df['C'].values, + 'D': df['D'].values}, + columns=result.columns, dtype=float) + expected2 = expected.rename(columns=lambda x: x.replace('_', '.')) + + tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(result2, expected2) + + +class TestLreshape(unittest.TestCase): def test_pairs(self): data = {'birthdt': ['08jan2009', '20dec2008', '30dec2008',