From 2b44e8f9874d1b62ddda5caa026b38f9f8feb416 Mon Sep 17 00:00:00 2001 From: Andy Hayden Date: Sat, 6 Jul 2013 00:15:26 +0100 Subject: [PATCH 1/3] ENH melt uses column name if available --- pandas/core/reshape.py | 10 +++++++--- pandas/tests/test_reshape.py | 5 +++++ 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/pandas/core/reshape.py b/pandas/core/reshape.py index 2cbeb1cf58a8f..e9d5fe124fc74 100644 --- a/pandas/core/reshape.py +++ b/pandas/core/reshape.py @@ -601,7 +601,7 @@ def _stack_multi_columns(frame, level=-1, dropna=True): def melt(frame, id_vars=None, value_vars=None, - var_name='variable', value_name='value'): + var_name=None, value_name='value'): """ "Unpivots" a DataFrame from wide format to long format, optionally leaving id variables set @@ -611,8 +611,8 @@ def melt(frame, id_vars=None, value_vars=None, frame : DataFrame id_vars : tuple, list, or ndarray value_vars : tuple, list, or ndarray - var_name : scalar - value_name : scalar + var_name : scalar, if None uses frame.column.name or 'variable' + value_name : scalar, default 'value' Examples -------- @@ -634,6 +634,7 @@ def melt(frame, id_vars=None, value_vars=None, a B 1 b B 3 c B 5 + """ # TODO: what about the existing index? if id_vars is not None: @@ -651,6 +652,9 @@ def melt(frame, id_vars=None, value_vars=None, else: frame = frame.copy() + if var_name is None: + var_name = frame.columns.name if frame.columns.name is not None else 'variable' + N, K = frame.shape K -= len(id_vars) diff --git a/pandas/tests/test_reshape.py b/pandas/tests/test_reshape.py index 6e5f6bffd7544..c47b0f05227dd 100644 --- a/pandas/tests/test_reshape.py +++ b/pandas/tests/test_reshape.py @@ -93,6 +93,11 @@ def test_melt(): columns=['id1', 'id2', var_name, value_name]) tm.assert_frame_equal(result19, expected19) + df1 = df.copy() + df1.columns.name = 'foo' + result20 = melt(df1) + assert(result20.columns.tolist() == ['foo', 'value']) + def test_convert_dummies(): df = DataFrame({'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'], From 0e94acaf1eedd33c2cf41147a0bab7ba567041d0 Mon Sep 17 00:00:00 2001 From: Andy Hayden Date: Sat, 6 Jul 2013 01:00:13 +0100 Subject: [PATCH 2/3] CLN test_reshape --- pandas/tests/test_reshape.py | 239 ++++++++++++++++++++--------------- 1 file changed, 135 insertions(+), 104 deletions(-) diff --git a/pandas/tests/test_reshape.py b/pandas/tests/test_reshape.py index c47b0f05227dd..09c63746c8d4b 100644 --- a/pandas/tests/test_reshape.py +++ b/pandas/tests/test_reshape.py @@ -20,110 +20,141 @@ _multiprocess_can_split_ = True -def test_melt(): - df = tm.makeTimeDataFrame()[:10] - df['id1'] = (df['A'] > 0).astype(np.int64) - df['id2'] = (df['B'] > 0).astype(np.int64) - - var_name = 'var' - value_name = 'val' - - # Default column names - result = melt(df) - result1 = melt(df, id_vars=['id1']) - result2 = melt(df, id_vars=['id1', 'id2']) - result3 = melt(df, id_vars=['id1', 'id2'], - value_vars='A') - result4 = melt(df, id_vars=['id1', 'id2'], - value_vars=['A', 'B']) - - expected4 = DataFrame({'id1': df['id1'].tolist() * 2, - 'id2': df['id2'].tolist() * 2, - 'variable': ['A']*10 + ['B']*10, - 'value': df['A'].tolist() + df['B'].tolist()}, - columns=['id1', 'id2', 'variable', 'value']) - tm.assert_frame_equal(result4, expected4) - - # Supply custom name for the 'variable' column - result5 = melt(df, var_name=var_name) - result6 = melt(df, id_vars=['id1'], var_name=var_name) - result7 = melt(df, id_vars=['id1', 'id2'], var_name=var_name) - result8 = melt(df, id_vars=['id1', 'id2'], - value_vars='A', var_name=var_name) - result9 = melt(df, id_vars=['id1', 'id2'], - value_vars=['A', 'B'], var_name=var_name) - - expected9 = DataFrame({'id1': df['id1'].tolist() * 2, - 'id2': df['id2'].tolist() * 2, - var_name: ['A']*10 + ['B']*10, - 'value': df['A'].tolist() + df['B'].tolist()}, - columns=['id1', 'id2', var_name, 'value']) - tm.assert_frame_equal(result9, expected9) - - # Supply custom name for the 'value' column - result10 = melt(df, value_name=value_name) - result11 = melt(df, id_vars=['id1'], value_name=value_name) - result12 = melt(df, id_vars=['id1', 'id2'], value_name=value_name) - result13 = melt(df, id_vars=['id1', 'id2'], - value_vars='A', value_name=value_name) - result14 = melt(df, id_vars=['id1', 'id2'], - value_vars=['A', 'B'], value_name=value_name) - - expected14 = DataFrame({'id1': df['id1'].tolist() * 2, - 'id2': df['id2'].tolist() * 2, - 'variable': ['A']*10 + ['B']*10, - value_name: df['A'].tolist() + df['B'].tolist()}, - columns=['id1', 'id2', 'variable', value_name]) - tm.assert_frame_equal(result14, expected14) - - # Supply custom names for the 'variable' and 'value' columns - result15 = melt(df, var_name=var_name, value_name=value_name) - result16 = melt(df, id_vars=['id1'], var_name=var_name, value_name=value_name) - result17 = melt(df, id_vars=['id1', 'id2'], - var_name=var_name, value_name=value_name) - result18 = melt(df, id_vars=['id1', 'id2'], - value_vars='A', var_name=var_name, value_name=value_name) - result19 = melt(df, id_vars=['id1', 'id2'], - value_vars=['A', 'B'], var_name=var_name, value_name=value_name) - - expected19 = DataFrame({'id1': df['id1'].tolist() * 2, - 'id2': df['id2'].tolist() * 2, - var_name: ['A']*10 + ['B']*10, - value_name: df['A'].tolist() + df['B'].tolist()}, - columns=['id1', 'id2', var_name, value_name]) - tm.assert_frame_equal(result19, expected19) - - df1 = df.copy() - df1.columns.name = 'foo' - result20 = melt(df1) - assert(result20.columns.tolist() == ['foo', 'value']) - -def test_convert_dummies(): - df = DataFrame({'A': ['foo', 'bar', 'foo', 'bar', - 'foo', 'bar', 'foo', 'foo'], - 'B': ['one', 'one', 'two', 'three', - 'two', 'two', 'one', 'three'], - 'C': np.random.randn(8), - 'D': np.random.randn(8)}) - - result = convert_dummies(df, ['A', 'B']) - result2 = convert_dummies(df, ['A', 'B'], prefix_sep='.') - - expected = DataFrame({'A_foo': [1, 0, 1, 0, 1, 0, 1, 1], - 'A_bar': [0, 1, 0, 1, 0, 1, 0, 0], - 'B_one': [1, 1, 0, 0, 0, 0, 1, 0], - 'B_two': [0, 0, 1, 0, 1, 1, 0, 0], - 'B_three': [0, 0, 0, 1, 0, 0, 0, 1], - 'C': df['C'].values, - 'D': df['D'].values}, - columns=result.columns, dtype=float) - expected2 = expected.rename(columns=lambda x: x.replace('_', '.')) - - tm.assert_frame_equal(result, expected) - tm.assert_frame_equal(result2, expected2) - - -class Test_lreshape(unittest.TestCase): +class TestMelt(unittest.TestCase): + + def setUp(self): + self.df = tm.makeTimeDataFrame()[:10] + self.df['id1'] = (self.df['A'] > 0).astype(np.int64) + self.df['id2'] = (self.df['B'] > 0).astype(np.int64) + + self.var_name = 'var' + self.value_name = 'val' + + def test_default_col_names(self): + result = melt(self.df) + self.assertEqual(result.columns.tolist(), ['variable', 'value']) + + result1 = melt(self.df, id_vars=['id1']) + self.assertEqual(result1.columns.tolist(), ['id1', 'variable', 'value']) + + result2 = melt(self.df, id_vars=['id1', 'id2']) + self.assertEqual(result2.columns.tolist(), ['id1', 'id2', 'variable', 'value']) + + def test_value_vars(self): + result3 = melt(self.df, id_vars=['id1', 'id2'], value_vars='A') + self.assertEqual(len(result3), 10) + + result4 = melt(self.df, id_vars=['id1', 'id2'], value_vars=['A', 'B']) + expected4 = DataFrame({'id1': self.df['id1'].tolist() * 2, + 'id2': self.df['id2'].tolist() * 2, + 'variable': ['A']*10 + ['B']*10, + 'value': self.df['A'].tolist() + self.df['B'].tolist()}, + columns=['id1', 'id2', 'variable', 'value']) + tm.assert_frame_equal(result4, expected4) + + def test_custom_var_name(self): + result5 = melt(self.df, var_name=self.var_name) + self.assertEqual(result5.columns.tolist(), ['var', 'value']) + + result6 = melt(self.df, id_vars=['id1'], var_name=self.var_name) + self.assertEqual(result6.columns.tolist(), ['id1', 'var', 'value']) + + result7 = melt(self.df, id_vars=['id1', 'id2'], var_name=self.var_name) + self.assertEqual(result7.columns.tolist(), ['id1', 'id2', 'var', 'value']) + + result8 = melt(self.df, id_vars=['id1', 'id2'], + value_vars='A', var_name=self.var_name) + self.assertEqual(result8.columns.tolist(), ['id1', 'id2', 'var', 'value']) + + result9 = melt(self.df, id_vars=['id1', 'id2'], + value_vars=['A', 'B'], var_name=self.var_name) + expected9 = DataFrame({'id1': self.df['id1'].tolist() * 2, + 'id2': self.df['id2'].tolist() * 2, + self.var_name: ['A']*10 + ['B']*10, + 'value': self.df['A'].tolist() + self.df['B'].tolist()}, + columns=['id1', 'id2', self.var_name, 'value']) + tm.assert_frame_equal(result9, expected9) + + def test_custom_value_name(self): + result10 = melt(self.df, value_name=self.value_name) + self.assertEqual(result10.columns.tolist(), ['variable', 'val']) + + result11 = melt(self.df, id_vars=['id1'], value_name=self.value_name) + self.assertEqual(result11.columns.tolist(), ['id1', 'variable', 'val']) + + result12 = melt(self.df, id_vars=['id1', 'id2'], value_name=self.value_name) + self.assertEqual(result12.columns.tolist(), ['id1', 'id2', 'variable', 'val']) + + result13 = melt(self.df, id_vars=['id1', 'id2'], + value_vars='A', value_name=self.value_name) + self.assertEqual(result13.columns.tolist(), ['id1', 'id2', 'variable', 'val']) + + result14 = melt(self.df, id_vars=['id1', 'id2'], + value_vars=['A', 'B'], value_name=self.value_name) + expected14 = DataFrame({'id1': self.df['id1'].tolist() * 2, + 'id2': self.df['id2'].tolist() * 2, + 'variable': ['A']*10 + ['B']*10, + self.value_name: self.df['A'].tolist() + self.df['B'].tolist()}, + columns=['id1', 'id2', 'variable', self.value_name]) + tm.assert_frame_equal(result14, expected14) + + def test_custom_var_and_value_name(self): + + result15 = melt(self.df, var_name=self.var_name, value_name=self.value_name) + self.assertEqual(result15.columns.tolist(), ['var', 'val']) + + result16 = melt(self.df, id_vars=['id1'], var_name=self.var_name, value_name=self.value_name) + self.assertEqual(result16.columns.tolist(), ['id1', 'var', 'val']) + + result17 = melt(self.df, id_vars=['id1', 'id2'], + var_name=self.var_name, value_name=self.value_name) + self.assertEqual(result17.columns.tolist(), ['id1', 'id2', 'var', 'val']) + + result18 = melt(df, id_vars=['id1', 'id2'], + value_vars='A', var_name=self.var_name, value_name=self.value_name) + self.assertEqual(result18.columns.tolist(), ['id1', 'id2', 'var', 'val']) + + result19 = melt(self.df, id_vars=['id1', 'id2'], + value_vars=['A', 'B'], var_name=self.var_name, value_name=self.value_name) + expected19 = DataFrame({'id1': self.df['id1'].tolist() * 2, + 'id2': self.df['id2'].tolist() * 2, + var_name: ['A']*10 + ['B']*10, + value_name: self.df['A'].tolist() + self.df['B'].tolist()}, + columns=['id1', 'id2', self.var_name, self.value_name]) + tm.assert_frame_equal(result19, expected19) + + def test_custom_var_and_value_name(self): + self.df.columns.name = 'foo' + result20 = melt(self.df) + self.assertEqual(result20.columns.tolist(), ['foo', 'value']) + +class TestConvertDummies(unittest.TestCase): + def test_convert_dummies(self): + df = DataFrame({'A': ['foo', 'bar', 'foo', 'bar', + 'foo', 'bar', 'foo', 'foo'], + 'B': ['one', 'one', 'two', 'three', + 'two', 'two', 'one', 'three'], + 'C': np.random.randn(8), + 'D': np.random.randn(8)}) + + result = convert_dummies(df, ['A', 'B']) + result2 = convert_dummies(df, ['A', 'B'], prefix_sep='.') + + expected = DataFrame({'A_foo': [1, 0, 1, 0, 1, 0, 1, 1], + 'A_bar': [0, 1, 0, 1, 0, 1, 0, 0], + 'B_one': [1, 1, 0, 0, 0, 0, 1, 0], + 'B_two': [0, 0, 1, 0, 1, 1, 0, 0], + 'B_three': [0, 0, 0, 1, 0, 0, 0, 1], + 'C': df['C'].values, + 'D': df['D'].values}, + columns=result.columns, dtype=float) + expected2 = expected.rename(columns=lambda x: x.replace('_', '.')) + + tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(result2, expected2) + + +class TestLreshape(unittest.TestCase): def test_pairs(self): data = {'birthdt': ['08jan2009', '20dec2008', '30dec2008', From 920ef814e3b7fcecb3e4de9d4107f6e4753fe531 Mon Sep 17 00:00:00 2001 From: Andy Hayden Date: Sat, 6 Jul 2013 01:31:39 +0100 Subject: [PATCH 3/3] DOC add release note about melt columns.name default behaviour --- doc/source/release.rst | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/doc/source/release.rst b/doc/source/release.rst index 7a271688c318b..a2f2d5308ff4d 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -75,13 +75,14 @@ pandas 0.12 - Simplified the API and added a describe method to Categorical - ``melt`` now accepts the optional parameters ``var_name`` and ``value_name`` to specify custom column names of the returned DataFrame (:issue:`3649`), - thanks @hoechenberger + thanks @hoechenberger. If ``var_name`` is not specified and ``dataframe.columns.name`` + is not None, then this will be used as the ``var_name`` (:issue:`4144`). - clipboard functions use pyperclip (no dependencies on Windows, alternative dependencies offered for Linux) (:issue:`3837`). - Plotting functions now raise a ``TypeError`` before trying to plot anything if the associated objects have have a dtype of ``object`` (:issue:`1818`, - :issue:`3572`, :issue:`3911`, :issue:`3912`), but they will try to convert object arrays to - numeric arrays if possible so that you can still plot, for example, an + :issue:`3572`, :issue:`3911`, :issue:`3912`), but they will try to convert object + arrays to numeric arrays if possible so that you can still plot, for example, an object array with floats. This happens before any drawing takes place which elimnates any spurious plots from showing up. - Added Faq section on repr display options, to help users customize their setup.