Skip to content

Commit

Permalink
BUG: Bug in a groupby of a non-lexsorted MultiIndex and multiple grou…
Browse files Browse the repository at this point in the history
…ping levels

closes pandas-dev#14776
  • Loading branch information
jreback committed Nov 30, 2016
1 parent 2bd9c95 commit cf31905
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 2 deletions.
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v0.19.2.txt
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ Bug Fixes
- Bug in ``pd.read_csv`` for the Python engine in which an unhelpful error message was being raised when ``skipfooter`` was not being respected by Python's CSV library (:issue:`13879`)



- Bug in ``.groupby(..., sort=True)`` of a non-lexsorted MultiIndex when grouping with multiple levels (:issue:`14776`)



Expand Down
12 changes: 11 additions & 1 deletion pandas/core/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -861,7 +861,17 @@ def reset_identity(values):
if isinstance(result, Series):
result = result.reindex(ax)
else:
result = result.reindex_axis(ax, axis=self.axis)

# this is a very unfortunate situation
# we have a multi-index that is NOT lexsorted
# and we have a result which is duplicated
# we can't reindex, so we resort to this
# GH 14776
if isinstance(ax, MultiIndex) and not ax.is_unique:
result = result.take(result.index.get_indexer_for(
ax.values).unique(), axis=self.axis)
else:
result = result.reindex_axis(ax, axis=self.axis)

elif self.group_keys:

Expand Down
19 changes: 19 additions & 0 deletions pandas/tests/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -4736,6 +4736,25 @@ def test_groupby_multiindex_not_lexsorted(self):
result = not_lexsorted_df.groupby('a').mean()
tm.assert_frame_equal(expected, result)

# a transforming function should work regardless of sort
# GH 14776
df = DataFrame({'x': ['a', 'a', 'b', 'a'],
'y': [1, 1, 2, 2],
'z': [1, 2, 3, 4]}).set_index(['x', 'y'])
self.assertFalse(df.index.is_lexsorted())

for level in [0, 1, [0, 1]]:
for sort in [False, True]:
result = df.groupby(level=level, sort=sort).apply(
DataFrame.drop_duplicates)
expected = df
tm.assert_frame_equal(expected, result)

result = df.sort_index().groupby(level=level, sort=sort).apply(
DataFrame.drop_duplicates)
expected = df.sort_index()
tm.assert_frame_equal(expected, result)

def test_groupby_levels_and_columns(self):
# GH9344, GH9049
idx_names = ['x', 'y']
Expand Down

0 comments on commit cf31905

Please sign in to comment.