Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BUG: unstack with sort=False fails when used with the level parameter… #56357

Merged
merged 11 commits into from
May 21, 2024
32 changes: 23 additions & 9 deletions pandas/core/reshape/reshape.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations

import collections
import itertools
from typing import (
TYPE_CHECKING,
Expand Down Expand Up @@ -162,8 +163,13 @@ def _indexer_and_to_sort(
]:
v = self.level

codes = list(self.index.codes)
levs = list(self.index.levels)
codes = list(self.index.codes)

if not self.sort:
# Create new codes considering that labels are already sorted
codes = [np.array(factorize(code)[0], dtype=code.dtype) for code in codes]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think you don't need to wrap with np.array(...) - is that right?


to_sort = codes[:v] + codes[v + 1 :] + [codes[v]]
sizes = tuple(len(x) for x in levs[:v] + levs[v + 1 :] + [levs[v]])

Expand All @@ -174,25 +180,33 @@ def _indexer_and_to_sort(
return indexer, to_sort

@cache_readonly
def sorted_labels(self) -> list[np.ndarray]:
def labels(self) -> list[np.ndarray]:
indexer, to_sort = self._indexer_and_to_sort
if self.sort:
return [line.take(indexer) for line in to_sort]
return to_sort

def _make_sorted_values(self, values: np.ndarray) -> np.ndarray:
@cache_readonly
def sorted_labels(self) -> list[np.ndarray]:
if self.sort:
indexer, _ = self._indexer_and_to_sort
return self.labels
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This seems a bit confusing to me: what was sorted_labels has become labels, and the code for sorted_labels returns the same result as labels when sort=True. If there are good reasons behind these names, maybe add a short docstring to make that clear? Otherwise, perhaps a renaming is in order.


sorted_values = algos.take_nd(values, indexer, axis=0)
return sorted_values
return values
v = self.level
codes = list(self.index.codes)
to_sort = codes[:v] + codes[v + 1 :] + [codes[v]]
return to_sort

def _make_sorted_values(self, values: np.ndarray) -> np.ndarray:
indexer, _ = self._indexer_and_to_sort
sorted_values = algos.take_nd(values, indexer, axis=0)
return sorted_values

def _make_selectors(self):
new_levels = self.new_index_levels

# make the mask
remaining_labels = self.sorted_labels[:-1]
remaining_labels = self.labels[:-1]
choosen_labels = self.labels[-1]
level_sizes = tuple(len(x) for x in new_levels)

comp_index, obs_ids = get_compressed_ids(remaining_labels, level_sizes)
Expand All @@ -202,7 +216,7 @@ def _make_selectors(self):
stride = self.index.levshape[self.level] + self.lift
self.full_shape = ngroups, stride

selector = self.sorted_labels[-1] + stride * comp_index + self.lift
selector = choosen_labels + stride * comp_index + self.lift
mask = np.zeros(np.prod(self.full_shape), dtype=bool)
mask.put(selector, True)

Expand Down
15 changes: 15 additions & 0 deletions pandas/tests/frame/test_stack_unstack.py
Original file line number Diff line number Diff line change
Expand Up @@ -1318,6 +1318,21 @@ def test_unstack_sort_false(frame_or_series, dtype):
[("two", "z", "b"), ("two", "y", "a"), ("one", "z", "b"), ("one", "y", "a")]
)
obj = frame_or_series(np.arange(1.0, 5.0), index=index, dtype=dtype)

result = obj.unstack(level=0, sort=False)

if frame_or_series is DataFrame:
expected_columns = MultiIndex.from_tuples([(0, "two"), (0, "one")])
else:
expected_columns = ["two", "one"]
expected = DataFrame(
[[1.0, 3.0], [2.0, 4.0]],
index=MultiIndex.from_tuples([("z", "b"), ("y", "a")]),
columns=expected_columns,
dtype=dtype,
)
tm.assert_frame_equal(result, expected)

result = obj.unstack(level=-1, sort=False)

if frame_or_series is DataFrame:
Expand Down
Loading