Permalink
Browse files

ENH: speed up in1d() in the case of ar1 >> ar2. Closes #1603.

A timing script justifying the switching criterion is attached to ticket 1603.

Thanks to Neil Crighton.
  • Loading branch information...
rgommers committed May 29, 2011
1 parent 3071eab commit 6441c2a788d0cc2a45c5e8a3ef0891ca4e42d96e
Showing with 59 additions and 55 deletions.
  1. +10 −2 numpy/lib/arraysetops.py
  2. +49 −53 numpy/lib/tests/test_arraysetops.py
View
@@ -277,7 +277,7 @@ def setxor1d(ar1, ar2, assume_unique=False):
def in1d(ar1, ar2, assume_unique=False):
"""
Test whether each element of a 1D array is also present in a second array.
Test whether each element of a 1-D array is also present in a second array.
Returns a boolean array the same length as `ar1` that is True
where an element of `ar1` is in `ar2` and False otherwise.
@@ -305,7 +305,7 @@ def in1d(ar1, ar2, assume_unique=False):
Notes
-----
`in1d` can be considered as an element-wise function version of the
python keyword `in`, for 1D sequences. ``in1d(a, b)`` is roughly
python keyword `in`, for 1-D sequences. ``in1d(a, b)`` is roughly
equivalent to ``np.array([item in b for item in a])``.
.. versionadded:: 1.4.0
@@ -321,6 +321,14 @@ def in1d(ar1, ar2, assume_unique=False):
array([0, 2, 0])
"""
# This code is significantly faster when the condition is satisfied.
if len(ar2) < 10 * len(ar1) ** 0.145:
mask = np.zeros(len(ar1), dtype=np.bool)
for a in ar2:
mask |= (ar1 == a)
return mask
# Otherwise use sorting
if not assume_unique:
ar1, rev_idx = np.unique(ar1, return_inverse=True)
ar2 = np.unique(ar2)
@@ -90,66 +90,62 @@ def test_ediff1d(self):
assert_array_equal([1],ediff1d(two_elem))
def test_in1d(self):
a = np.array( [5, 7, 1, 2] )
b = np.array( [2, 4, 3, 1, 5] )
ec = np.array( [True, False, True, True] )
c = in1d( a, b, assume_unique=True )
assert_array_equal( c, ec )
a[0] = 8
ec = np.array( [False, False, True, True] )
c = in1d( a, b, assume_unique=True )
assert_array_equal( c, ec )
a[0], a[3] = 4, 8
ec = np.array( [True, False, True, False] )
c = in1d( a, b, assume_unique=True )
assert_array_equal( c, ec )
a = np.array([5,4,5,3,4,4,3,4,3,5,2,1,5,5])
b = [2,3,4]
ec = [False, True, False, True, True, True, True, True, True, False,
True, False, False, False]
c = in1d(a, b)
assert_array_equal(c, ec)
b = b + [5, 5, 4]
ec = [True, True, True, True, True, True, True, True, True, True,
True, False, True, True]
c = in1d(a, b)
assert_array_equal(c, ec)
a = np.array([5, 7, 1, 2])
b = np.array([2, 4, 3, 1, 5])
ec = np.array([True, False, True, True])
c = in1d(a, b)
assert_array_equal(c, ec)
a = np.array([5, 7, 1, 1, 2])
b = np.array([2, 4, 3, 3, 1, 5])
ec = np.array([True, False, True, True, True])
c = in1d(a, b)
assert_array_equal(c, ec)
# we use two different sizes for the b array here to test the
# two different paths in in1d().
for mult in (1, 10):
a = np.array([5, 7, 1, 2])
b = np.array([2, 4, 3, 1, 5] * mult)
ec = np.array([True, False, True, True])
c = in1d(a, b, assume_unique=True)
assert_array_equal(c, ec)
a[0] = 8
ec = np.array([False, False, True, True])
c = in1d(a, b, assume_unique=True)
assert_array_equal(c, ec)
a[0], a[3] = 4, 8
ec = np.array([True, False, True, False])
c = in1d(a, b, assume_unique=True)
assert_array_equal(c, ec)
a = np.array([5, 4, 5, 3, 4, 4, 3, 4, 3, 5, 2, 1, 5, 5])
b = [2, 3, 4] * mult
ec = [False, True, False, True, True, True, True, True, True, False,
True, False, False, False]
c = in1d(a, b)
assert_array_equal(c, ec)
b = b + [5, 5, 4] * mult
ec = [True, True, True, True, True, True, True, True, True, True,
True, False, True, True]
c = in1d(a, b)
assert_array_equal(c, ec)
a = np.array([5, 7, 1, 2])
b = np.array([2, 4, 3, 1, 5] * mult)
ec = np.array([True, False, True, True])
c = in1d(a, b)
assert_array_equal(c, ec)
a = np.array([5, 7, 1, 1, 2])
b = np.array([2, 4, 3, 3, 1, 5] * mult)
ec = np.array([True, False, True, True, True])
c = in1d(a, b)
assert_array_equal(c, ec)
a = np.array([5, 5])
b = np.array([2, 2] * mult)
ec = np.array([False, False])
c = in1d(a, b)
assert_array_equal(c, ec)
a = np.array([5])
b = np.array([2])
ec = np.array([False])
c = in1d(a, b)
assert_array_equal(c, ec)
a = np.array([5, 5])
b = np.array([2, 2])
ec = np.array([False, False])
c = in1d(a, b)
assert_array_equal(c, ec)
assert_array_equal(in1d([], []), [])
def test_in1d_char_array( self ):

0 comments on commit 6441c2a

Please sign in to comment.