Skip to content

Commit

Permalink
ENH: speed up in1d() in the case of ar1 >> ar2. Closes #1603.
Browse files Browse the repository at this point in the history
A timing script justifying the switching criterion is attached to ticket 1603.

Thanks to Neil Crighton.
  • Loading branch information
rgommers committed May 29, 2011
1 parent 3071eab commit 6441c2a
Show file tree
Hide file tree
Showing 2 changed files with 59 additions and 55 deletions.
12 changes: 10 additions & 2 deletions numpy/lib/arraysetops.py
Original file line number Diff line number Diff line change
Expand Up @@ -277,7 +277,7 @@ def setxor1d(ar1, ar2, assume_unique=False):

def in1d(ar1, ar2, assume_unique=False):
"""
Test whether each element of a 1D array is also present in a second array.
Test whether each element of a 1-D array is also present in a second array.
Returns a boolean array the same length as `ar1` that is True
where an element of `ar1` is in `ar2` and False otherwise.
Expand Down Expand Up @@ -305,7 +305,7 @@ def in1d(ar1, ar2, assume_unique=False):
Notes
-----
`in1d` can be considered as an element-wise function version of the
python keyword `in`, for 1D sequences. ``in1d(a, b)`` is roughly
python keyword `in`, for 1-D sequences. ``in1d(a, b)`` is roughly
equivalent to ``np.array([item in b for item in a])``.
.. versionadded:: 1.4.0
Expand All @@ -321,6 +321,14 @@ def in1d(ar1, ar2, assume_unique=False):
array([0, 2, 0])
"""
# This code is significantly faster when the condition is satisfied.
if len(ar2) < 10 * len(ar1) ** 0.145:
mask = np.zeros(len(ar1), dtype=np.bool)
for a in ar2:
mask |= (ar1 == a)
return mask

# Otherwise use sorting
if not assume_unique:
ar1, rev_idx = np.unique(ar1, return_inverse=True)
ar2 = np.unique(ar2)
Expand Down
102 changes: 49 additions & 53 deletions numpy/lib/tests/test_arraysetops.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,66 +90,62 @@ def test_ediff1d(self):
assert_array_equal([1],ediff1d(two_elem))

def test_in1d(self):
a = np.array( [5, 7, 1, 2] )
b = np.array( [2, 4, 3, 1, 5] )

ec = np.array( [True, False, True, True] )
c = in1d( a, b, assume_unique=True )
assert_array_equal( c, ec )

a[0] = 8
ec = np.array( [False, False, True, True] )
c = in1d( a, b, assume_unique=True )
assert_array_equal( c, ec )

a[0], a[3] = 4, 8
ec = np.array( [True, False, True, False] )
c = in1d( a, b, assume_unique=True )
assert_array_equal( c, ec )

a = np.array([5,4,5,3,4,4,3,4,3,5,2,1,5,5])
b = [2,3,4]

ec = [False, True, False, True, True, True, True, True, True, False,
True, False, False, False]
c = in1d(a, b)
assert_array_equal(c, ec)

b = b + [5, 5, 4]

ec = [True, True, True, True, True, True, True, True, True, True,
True, False, True, True]
c = in1d(a, b)
assert_array_equal(c, ec)

a = np.array([5, 7, 1, 2])
b = np.array([2, 4, 3, 1, 5])

ec = np.array([True, False, True, True])
c = in1d(a, b)
assert_array_equal(c, ec)

a = np.array([5, 7, 1, 1, 2])
b = np.array([2, 4, 3, 3, 1, 5])

ec = np.array([True, False, True, True, True])
c = in1d(a, b)
assert_array_equal(c, ec)
# we use two different sizes for the b array here to test the
# two different paths in in1d().
for mult in (1, 10):
a = np.array([5, 7, 1, 2])
b = np.array([2, 4, 3, 1, 5] * mult)
ec = np.array([True, False, True, True])
c = in1d(a, b, assume_unique=True)
assert_array_equal(c, ec)

a[0] = 8
ec = np.array([False, False, True, True])
c = in1d(a, b, assume_unique=True)
assert_array_equal(c, ec)

a[0], a[3] = 4, 8
ec = np.array([True, False, True, False])
c = in1d(a, b, assume_unique=True)
assert_array_equal(c, ec)

a = np.array([5, 4, 5, 3, 4, 4, 3, 4, 3, 5, 2, 1, 5, 5])
b = [2, 3, 4] * mult
ec = [False, True, False, True, True, True, True, True, True, False,
True, False, False, False]
c = in1d(a, b)
assert_array_equal(c, ec)

b = b + [5, 5, 4] * mult
ec = [True, True, True, True, True, True, True, True, True, True,
True, False, True, True]
c = in1d(a, b)
assert_array_equal(c, ec)

a = np.array([5, 7, 1, 2])
b = np.array([2, 4, 3, 1, 5] * mult)
ec = np.array([True, False, True, True])
c = in1d(a, b)
assert_array_equal(c, ec)

a = np.array([5, 7, 1, 1, 2])
b = np.array([2, 4, 3, 3, 1, 5] * mult)
ec = np.array([True, False, True, True, True])
c = in1d(a, b)
assert_array_equal(c, ec)

a = np.array([5, 5])
b = np.array([2, 2] * mult)
ec = np.array([False, False])
c = in1d(a, b)
assert_array_equal(c, ec)

a = np.array([5])
b = np.array([2])

ec = np.array([False])
c = in1d(a, b)
assert_array_equal(c, ec)

a = np.array([5, 5])
b = np.array([2, 2])

ec = np.array([False, False])
c = in1d(a, b)
assert_array_equal(c, ec)

assert_array_equal(in1d([], []), [])

def test_in1d_char_array( self ):
Expand Down

0 comments on commit 6441c2a

Please sign in to comment.