Skip to content

Commit

Permalink
Merge pull request #4144 from hayd/melt_name
Browse files Browse the repository at this point in the history
ENH melt uses column name if available
  • Loading branch information
hayd committed Jul 6, 2013
2 parents 565ee0c + 920ef81 commit 0378de5
Show file tree
Hide file tree
Showing 3 changed files with 146 additions and 105 deletions.
7 changes: 4 additions & 3 deletions doc/source/release.rst
Expand Up @@ -75,13 +75,14 @@ pandas 0.12
- Simplified the API and added a describe method to Categorical
- ``melt`` now accepts the optional parameters ``var_name`` and ``value_name``
to specify custom column names of the returned DataFrame (:issue:`3649`),
thanks @hoechenberger
thanks @hoechenberger. If ``var_name`` is not specified and ``dataframe.columns.name``
is not None, then this will be used as the ``var_name`` (:issue:`4144`).
- clipboard functions use pyperclip (no dependencies on Windows, alternative
dependencies offered for Linux) (:issue:`3837`).
- Plotting functions now raise a ``TypeError`` before trying to plot anything
if the associated objects have have a dtype of ``object`` (:issue:`1818`,
:issue:`3572`, :issue:`3911`, :issue:`3912`), but they will try to convert object arrays to
numeric arrays if possible so that you can still plot, for example, an
:issue:`3572`, :issue:`3911`, :issue:`3912`), but they will try to convert object
arrays to numeric arrays if possible so that you can still plot, for example, an
object array with floats. This happens before any drawing takes place which
elimnates any spurious plots from showing up.
- Added Faq section on repr display options, to help users customize their setup.
Expand Down
10 changes: 7 additions & 3 deletions pandas/core/reshape.py
Expand Up @@ -601,7 +601,7 @@ def _stack_multi_columns(frame, level=-1, dropna=True):


def melt(frame, id_vars=None, value_vars=None,
var_name='variable', value_name='value'):
var_name=None, value_name='value'):
"""
"Unpivots" a DataFrame from wide format to long format, optionally leaving
id variables set
Expand All @@ -611,8 +611,8 @@ def melt(frame, id_vars=None, value_vars=None,
frame : DataFrame
id_vars : tuple, list, or ndarray
value_vars : tuple, list, or ndarray
var_name : scalar
value_name : scalar
var_name : scalar, if None uses frame.column.name or 'variable'
value_name : scalar, default 'value'
Examples
--------
Expand All @@ -634,6 +634,7 @@ def melt(frame, id_vars=None, value_vars=None,
a B 1
b B 3
c B 5
"""
# TODO: what about the existing index?
if id_vars is not None:
Expand All @@ -651,6 +652,9 @@ def melt(frame, id_vars=None, value_vars=None,
else:
frame = frame.copy()

if var_name is None:
var_name = frame.columns.name if frame.columns.name is not None else 'variable'

N, K = frame.shape
K -= len(id_vars)

Expand Down
234 changes: 135 additions & 99 deletions pandas/tests/test_reshape.py
Expand Up @@ -20,105 +20,141 @@
_multiprocess_can_split_ = True


def test_melt():
df = tm.makeTimeDataFrame()[:10]
df['id1'] = (df['A'] > 0).astype(np.int64)
df['id2'] = (df['B'] > 0).astype(np.int64)

var_name = 'var'
value_name = 'val'

# Default column names
result = melt(df)
result1 = melt(df, id_vars=['id1'])
result2 = melt(df, id_vars=['id1', 'id2'])
result3 = melt(df, id_vars=['id1', 'id2'],
value_vars='A')
result4 = melt(df, id_vars=['id1', 'id2'],
value_vars=['A', 'B'])

expected4 = DataFrame({'id1': df['id1'].tolist() * 2,
'id2': df['id2'].tolist() * 2,
'variable': ['A']*10 + ['B']*10,
'value': df['A'].tolist() + df['B'].tolist()},
columns=['id1', 'id2', 'variable', 'value'])
tm.assert_frame_equal(result4, expected4)

# Supply custom name for the 'variable' column
result5 = melt(df, var_name=var_name)
result6 = melt(df, id_vars=['id1'], var_name=var_name)
result7 = melt(df, id_vars=['id1', 'id2'], var_name=var_name)
result8 = melt(df, id_vars=['id1', 'id2'],
value_vars='A', var_name=var_name)
result9 = melt(df, id_vars=['id1', 'id2'],
value_vars=['A', 'B'], var_name=var_name)

expected9 = DataFrame({'id1': df['id1'].tolist() * 2,
'id2': df['id2'].tolist() * 2,
var_name: ['A']*10 + ['B']*10,
'value': df['A'].tolist() + df['B'].tolist()},
columns=['id1', 'id2', var_name, 'value'])
tm.assert_frame_equal(result9, expected9)

# Supply custom name for the 'value' column
result10 = melt(df, value_name=value_name)
result11 = melt(df, id_vars=['id1'], value_name=value_name)
result12 = melt(df, id_vars=['id1', 'id2'], value_name=value_name)
result13 = melt(df, id_vars=['id1', 'id2'],
value_vars='A', value_name=value_name)
result14 = melt(df, id_vars=['id1', 'id2'],
value_vars=['A', 'B'], value_name=value_name)

expected14 = DataFrame({'id1': df['id1'].tolist() * 2,
'id2': df['id2'].tolist() * 2,
'variable': ['A']*10 + ['B']*10,
value_name: df['A'].tolist() + df['B'].tolist()},
columns=['id1', 'id2', 'variable', value_name])
tm.assert_frame_equal(result14, expected14)

# Supply custom names for the 'variable' and 'value' columns
result15 = melt(df, var_name=var_name, value_name=value_name)
result16 = melt(df, id_vars=['id1'], var_name=var_name, value_name=value_name)
result17 = melt(df, id_vars=['id1', 'id2'],
var_name=var_name, value_name=value_name)
result18 = melt(df, id_vars=['id1', 'id2'],
value_vars='A', var_name=var_name, value_name=value_name)
result19 = melt(df, id_vars=['id1', 'id2'],
value_vars=['A', 'B'], var_name=var_name, value_name=value_name)

expected19 = DataFrame({'id1': df['id1'].tolist() * 2,
'id2': df['id2'].tolist() * 2,
var_name: ['A']*10 + ['B']*10,
value_name: df['A'].tolist() + df['B'].tolist()},
columns=['id1', 'id2', var_name, value_name])
tm.assert_frame_equal(result19, expected19)

def test_convert_dummies():
df = DataFrame({'A': ['foo', 'bar', 'foo', 'bar',
'foo', 'bar', 'foo', 'foo'],
'B': ['one', 'one', 'two', 'three',
'two', 'two', 'one', 'three'],
'C': np.random.randn(8),
'D': np.random.randn(8)})

result = convert_dummies(df, ['A', 'B'])
result2 = convert_dummies(df, ['A', 'B'], prefix_sep='.')

expected = DataFrame({'A_foo': [1, 0, 1, 0, 1, 0, 1, 1],
'A_bar': [0, 1, 0, 1, 0, 1, 0, 0],
'B_one': [1, 1, 0, 0, 0, 0, 1, 0],
'B_two': [0, 0, 1, 0, 1, 1, 0, 0],
'B_three': [0, 0, 0, 1, 0, 0, 0, 1],
'C': df['C'].values,
'D': df['D'].values},
columns=result.columns, dtype=float)
expected2 = expected.rename(columns=lambda x: x.replace('_', '.'))

tm.assert_frame_equal(result, expected)
tm.assert_frame_equal(result2, expected2)


class Test_lreshape(unittest.TestCase):
class TestMelt(unittest.TestCase):

def setUp(self):
self.df = tm.makeTimeDataFrame()[:10]
self.df['id1'] = (self.df['A'] > 0).astype(np.int64)
self.df['id2'] = (self.df['B'] > 0).astype(np.int64)

self.var_name = 'var'
self.value_name = 'val'

def test_default_col_names(self):
result = melt(self.df)
self.assertEqual(result.columns.tolist(), ['variable', 'value'])

result1 = melt(self.df, id_vars=['id1'])
self.assertEqual(result1.columns.tolist(), ['id1', 'variable', 'value'])

result2 = melt(self.df, id_vars=['id1', 'id2'])
self.assertEqual(result2.columns.tolist(), ['id1', 'id2', 'variable', 'value'])

def test_value_vars(self):
result3 = melt(self.df, id_vars=['id1', 'id2'], value_vars='A')
self.assertEqual(len(result3), 10)

result4 = melt(self.df, id_vars=['id1', 'id2'], value_vars=['A', 'B'])
expected4 = DataFrame({'id1': self.df['id1'].tolist() * 2,
'id2': self.df['id2'].tolist() * 2,
'variable': ['A']*10 + ['B']*10,
'value': self.df['A'].tolist() + self.df['B'].tolist()},
columns=['id1', 'id2', 'variable', 'value'])
tm.assert_frame_equal(result4, expected4)

def test_custom_var_name(self):
result5 = melt(self.df, var_name=self.var_name)
self.assertEqual(result5.columns.tolist(), ['var', 'value'])

result6 = melt(self.df, id_vars=['id1'], var_name=self.var_name)
self.assertEqual(result6.columns.tolist(), ['id1', 'var', 'value'])

result7 = melt(self.df, id_vars=['id1', 'id2'], var_name=self.var_name)
self.assertEqual(result7.columns.tolist(), ['id1', 'id2', 'var', 'value'])

result8 = melt(self.df, id_vars=['id1', 'id2'],
value_vars='A', var_name=self.var_name)
self.assertEqual(result8.columns.tolist(), ['id1', 'id2', 'var', 'value'])

result9 = melt(self.df, id_vars=['id1', 'id2'],
value_vars=['A', 'B'], var_name=self.var_name)
expected9 = DataFrame({'id1': self.df['id1'].tolist() * 2,
'id2': self.df['id2'].tolist() * 2,
self.var_name: ['A']*10 + ['B']*10,
'value': self.df['A'].tolist() + self.df['B'].tolist()},
columns=['id1', 'id2', self.var_name, 'value'])
tm.assert_frame_equal(result9, expected9)

def test_custom_value_name(self):
result10 = melt(self.df, value_name=self.value_name)
self.assertEqual(result10.columns.tolist(), ['variable', 'val'])

result11 = melt(self.df, id_vars=['id1'], value_name=self.value_name)
self.assertEqual(result11.columns.tolist(), ['id1', 'variable', 'val'])

result12 = melt(self.df, id_vars=['id1', 'id2'], value_name=self.value_name)
self.assertEqual(result12.columns.tolist(), ['id1', 'id2', 'variable', 'val'])

result13 = melt(self.df, id_vars=['id1', 'id2'],
value_vars='A', value_name=self.value_name)
self.assertEqual(result13.columns.tolist(), ['id1', 'id2', 'variable', 'val'])

result14 = melt(self.df, id_vars=['id1', 'id2'],
value_vars=['A', 'B'], value_name=self.value_name)
expected14 = DataFrame({'id1': self.df['id1'].tolist() * 2,
'id2': self.df['id2'].tolist() * 2,
'variable': ['A']*10 + ['B']*10,
self.value_name: self.df['A'].tolist() + self.df['B'].tolist()},
columns=['id1', 'id2', 'variable', self.value_name])
tm.assert_frame_equal(result14, expected14)

def test_custom_var_and_value_name(self):

result15 = melt(self.df, var_name=self.var_name, value_name=self.value_name)
self.assertEqual(result15.columns.tolist(), ['var', 'val'])

result16 = melt(self.df, id_vars=['id1'], var_name=self.var_name, value_name=self.value_name)
self.assertEqual(result16.columns.tolist(), ['id1', 'var', 'val'])

result17 = melt(self.df, id_vars=['id1', 'id2'],
var_name=self.var_name, value_name=self.value_name)
self.assertEqual(result17.columns.tolist(), ['id1', 'id2', 'var', 'val'])

result18 = melt(df, id_vars=['id1', 'id2'],
value_vars='A', var_name=self.var_name, value_name=self.value_name)
self.assertEqual(result18.columns.tolist(), ['id1', 'id2', 'var', 'val'])

result19 = melt(self.df, id_vars=['id1', 'id2'],
value_vars=['A', 'B'], var_name=self.var_name, value_name=self.value_name)
expected19 = DataFrame({'id1': self.df['id1'].tolist() * 2,
'id2': self.df['id2'].tolist() * 2,
var_name: ['A']*10 + ['B']*10,
value_name: self.df['A'].tolist() + self.df['B'].tolist()},
columns=['id1', 'id2', self.var_name, self.value_name])
tm.assert_frame_equal(result19, expected19)

def test_custom_var_and_value_name(self):
self.df.columns.name = 'foo'
result20 = melt(self.df)
self.assertEqual(result20.columns.tolist(), ['foo', 'value'])

class TestConvertDummies(unittest.TestCase):
def test_convert_dummies(self):
df = DataFrame({'A': ['foo', 'bar', 'foo', 'bar',
'foo', 'bar', 'foo', 'foo'],
'B': ['one', 'one', 'two', 'three',
'two', 'two', 'one', 'three'],
'C': np.random.randn(8),
'D': np.random.randn(8)})

result = convert_dummies(df, ['A', 'B'])
result2 = convert_dummies(df, ['A', 'B'], prefix_sep='.')

expected = DataFrame({'A_foo': [1, 0, 1, 0, 1, 0, 1, 1],
'A_bar': [0, 1, 0, 1, 0, 1, 0, 0],
'B_one': [1, 1, 0, 0, 0, 0, 1, 0],
'B_two': [0, 0, 1, 0, 1, 1, 0, 0],
'B_three': [0, 0, 0, 1, 0, 0, 0, 1],
'C': df['C'].values,
'D': df['D'].values},
columns=result.columns, dtype=float)
expected2 = expected.rename(columns=lambda x: x.replace('_', '.'))

tm.assert_frame_equal(result, expected)
tm.assert_frame_equal(result2, expected2)


class TestLreshape(unittest.TestCase):

def test_pairs(self):
data = {'birthdt': ['08jan2009', '20dec2008', '30dec2008',
Expand Down

0 comments on commit 0378de5

Please sign in to comment.