Skip to content

Commit

Permalink
BUG: Preserve data order when stacking unsorted levels (pandas-dev#16323
Browse files Browse the repository at this point in the history
  • Loading branch information
dsm054 authored and stangirala committed Jun 11, 2017
1 parent b089c78 commit f1b03f6
Show file tree
Hide file tree
Showing 3 changed files with 33 additions and 2 deletions.
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v0.20.2.txt
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ Sparse
Reshaping
^^^^^^^^^


- Bug in ``DataFrame.stack`` with unsorted levels in MultiIndex columns (:issue:`16323`)


Numeric
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/reshape/reshape.py
Original file line number Diff line number Diff line change
Expand Up @@ -689,7 +689,7 @@ def _convert_level_number(level_num, columns):
new_labels = [np.arange(N).repeat(levsize)]
new_names = [this.index.name] # something better?

new_levels.append(frame.columns.levels[level_num])
new_levels.append(level_vals)
new_labels.append(np.tile(level_labels, N))
new_names.append(frame.columns.names[level_num])

Expand Down
31 changes: 31 additions & 0 deletions pandas/tests/test_multilevel.py
Original file line number Diff line number Diff line change
Expand Up @@ -1193,6 +1193,37 @@ def test_unstack_unobserved_keys(self):
recons = result.stack()
tm.assert_frame_equal(recons, df)

def test_stack_order_with_unsorted_levels(self):
# GH 16323

def manual_compare_stacked(df, df_stacked, lev0, lev1):
assert all(df.loc[row, col] ==
df_stacked.loc[(row, col[lev0]), col[lev1]]
for row in df.index for col in df.columns)

# deep check for 1-row case
for width in [2, 3]:
levels_poss = itertools.product(
itertools.permutations([0, 1, 2], width),
repeat=2)

for levels in levels_poss:
columns = MultiIndex(levels=levels,
labels=[[0, 0, 1, 1],
[0, 1, 0, 1]])
df = DataFrame(columns=columns, data=[range(4)])
for stack_lev in range(2):
df_stacked = df.stack(stack_lev)
manual_compare_stacked(df, df_stacked,
stack_lev, 1 - stack_lev)

# check multi-row case
mi = MultiIndex(levels=[["A", "C", "B"], ["B", "A", "C"]],
labels=[np.repeat(range(3), 3), np.tile(range(3), 3)])
df = DataFrame(columns=mi, index=range(5),
data=np.arange(5 * len(mi)).reshape(5, -1))
manual_compare_stacked(df, df.stack(0), 0, 1)

def test_groupby_corner(self):
midx = MultiIndex(levels=[['foo'], ['bar'], ['baz']],
labels=[[0], [0], [0]],
Expand Down

0 comments on commit f1b03f6

Please sign in to comment.