Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions doc/release/upcoming_changes/30822.improvement.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
Bugfix for the `np.unique` function with `equal_nan=True` and `axis!=None`.
---------------------------------------

The unique function had the limitation that for `equal_nan=True` and `axis!=None`, the result was wrong. For example, `np.unique([np.nan, np.nan], axis=0, equal_nan=True)` returns `array([nan, nan])` and `np.unique([[0, np.nan], [0,np.nan]], axis=0, equal_nan=True)` returns `array([[ 0., nan], [ 0., nan]])`. For more examples, see the list of fixed issues below.

Fixed issues: gh-29336 , gh-23286 , gh-20873.

Now, the behavior is fixed. For instance, the first example returns `array([nan])` and the second `array([[ 0., nan], [ 0., nan]])`.

Note: this bugfix respects the previously established behavior of complex numbers with `equal_nan=True`. The comparison for equality between `x` and `y` is basically `(x == y) | (is_nan(x) & is_nan(y))`, so, for instance, `0+nanj` is considered equal to `1+nanj` and to `nan+3j`.
14 changes: 13 additions & 1 deletion numpy/lib/_arraysetops_impl.py
Original file line number Diff line number Diff line change
Expand Up @@ -386,7 +386,19 @@ def _unique1d(ar, return_index=False, return_inverse=False,
aux = ar
mask = np.empty(aux.shape, dtype=np.bool)
mask[:1] = True
if (equal_nan and aux.shape[0] > 0 and aux.dtype.kind in "cfmM" and
if (equal_nan and aux.shape[0] > 0 and aux.dtype.names is not None and
any(aux[name].dtype.kind in "cfmM" for name in aux.dtype.names)):
# Structured dtype: compare field by field, treating NaNs as equal
mask[1:] = np.zeros(aux.shape[0] - 1, dtype=np.bool)
for name in aux.dtype.names:
col = aux[name]
col_diff = col[1:] != col[:-1]
if col.dtype.kind in "cfmM":
# Floating, complex, or datetime/timedelta: handle NaN values
is_nan = np.isnan(col)
col_diff = col_diff & ~(is_nan[1:] & is_nan[:-1])
mask[1:] = mask[1:] | col_diff
elif (equal_nan and aux.shape[0] > 0 and aux.dtype.kind in "cfmM" and
np.isnan(aux[-1])):
if aux.dtype.kind == "c": # for complex all NaNs are considered equivalent
aux_firstnan = np.searchsorted(np.isnan(aux), True, side='left')
Expand Down
31 changes: 31 additions & 0 deletions numpy/lib/tests/test_arraysetops.py
Original file line number Diff line number Diff line change
Expand Up @@ -831,6 +831,37 @@ def test_unique_zero_sized(self):
c = np.array([], np.int64)
self.check_all(a, b, i1, i2, c, dt)

def test_unique_nan_with_axis(self):

# issue 23286
a = np.array([0., np.nan, 2., np.nan, 2., 1., 0., 1., 2., 0.])
expected = np.array([0., 1., 2., np.nan])
result = np.unique(a, equal_nan=True, axis=0)
assert_equal(result, expected)

# issue 20873
a = np.array([[np.nan, 2.], [np.nan, 2.]])
expected = np.array([[np.nan, 2.]])
result = np.unique(a, equal_nan=True, axis=0)
assert_equal(result, expected)

# issue 29336
a = np.array([[np.nan, 0, 0], [np.nan, 0, 0]])
expected = np.array([[np.nan, 0, 0]])
result = np.unique(a, axis=0, equal_nan=True)
assert_equal(result, expected)

a = np.array([np.nan, 0, 0, np.nan])
expected = np.array([0, np.nan])
result = np.unique(a, axis=0, equal_nan=True)
assert_equal(result, expected)

# Extra case
a = np.array([[0, np.nan, 2], [0, np.nan, 1], [0, np.nan, 2]])
expected = np.array([[0, np.nan, 1], [0, np.nan, 2]])
result = np.unique(a, axis=0)
assert_equal(result, expected)

def test_unique_subclass(self):
class Subclass(np.ndarray):
pass
Expand Down