/ numpy Public

## Commit

ENH: speed up in1d() in the case of ar1 >> ar2. Closes #1603.
Browse the repository at this point in the history
```A timing script justifying the switching criterion is attached to ticket 1603.

Thanks to Neil Crighton.```
rgommers committed May 29, 2011
1 parent 3071eab commit 6441c2a
Showing 2 changed files with 59 additions and 55 deletions.
12 changes: 10 additions & 2 deletions
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode characters
Original file line number Diff line number Diff line change
Expand Up @@ -277,7 +277,7 @@ def setxor1d(ar1, ar2, assume_unique=False):

def in1d(ar1, ar2, assume_unique=False):
"""
Test whether each element of a 1D array is also present in a second array.
Test whether each element of a 1-D array is also present in a second array.
Returns a boolean array the same length as `ar1` that is True
where an element of `ar1` is in `ar2` and False otherwise.
Expand Down Expand Up @@ -305,7 +305,7 @@ def in1d(ar1, ar2, assume_unique=False):
Notes
-----
`in1d` can be considered as an element-wise function version of the
python keyword `in`, for 1D sequences. ``in1d(a, b)`` is roughly
python keyword `in`, for 1-D sequences. ``in1d(a, b)`` is roughly
equivalent to ``np.array([item in b for item in a])``.
Expand All @@ -321,6 +321,14 @@ def in1d(ar1, ar2, assume_unique=False):
array([0, 2, 0])
"""
# This code is significantly faster when the condition is satisfied.
if len(ar2) < 10 * len(ar1) ** 0.145:
for a in ar2:

# Otherwise use sorting
if not assume_unique:
ar1, rev_idx = np.unique(ar1, return_inverse=True)
ar2 = np.unique(ar2)
Expand Down
102 changes: 49 additions & 53 deletions
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode characters
Original file line number Diff line number Diff line change
Expand Up @@ -90,66 +90,62 @@ def test_ediff1d(self):
assert_array_equal([1],ediff1d(two_elem))

def test_in1d(self):
a = np.array( [5, 7, 1, 2] )
b = np.array( [2, 4, 3, 1, 5] )

ec = np.array( [True, False, True, True] )
c = in1d( a, b, assume_unique=True )
assert_array_equal( c, ec )

a[0] = 8
ec = np.array( [False, False, True, True] )
c = in1d( a, b, assume_unique=True )
assert_array_equal( c, ec )

a[0], a[3] = 4, 8
ec = np.array( [True, False, True, False] )
c = in1d( a, b, assume_unique=True )
assert_array_equal( c, ec )

a = np.array([5,4,5,3,4,4,3,4,3,5,2,1,5,5])
b = [2,3,4]

ec = [False, True, False, True, True, True, True, True, True, False,
True, False, False, False]
c = in1d(a, b)
assert_array_equal(c, ec)

b = b + [5, 5, 4]

ec = [True, True, True, True, True, True, True, True, True, True,
True, False, True, True]
c = in1d(a, b)
assert_array_equal(c, ec)

a = np.array([5, 7, 1, 2])
b = np.array([2, 4, 3, 1, 5])

ec = np.array([True, False, True, True])
c = in1d(a, b)
assert_array_equal(c, ec)

a = np.array([5, 7, 1, 1, 2])
b = np.array([2, 4, 3, 3, 1, 5])

ec = np.array([True, False, True, True, True])
c = in1d(a, b)
assert_array_equal(c, ec)
# we use two different sizes for the b array here to test the
# two different paths in in1d().
for mult in (1, 10):
a = np.array([5, 7, 1, 2])
b = np.array([2, 4, 3, 1, 5] * mult)
ec = np.array([True, False, True, True])
c = in1d(a, b, assume_unique=True)
assert_array_equal(c, ec)

a[0] = 8
ec = np.array([False, False, True, True])
c = in1d(a, b, assume_unique=True)
assert_array_equal(c, ec)

a[0], a[3] = 4, 8
ec = np.array([True, False, True, False])
c = in1d(a, b, assume_unique=True)
assert_array_equal(c, ec)

a = np.array([5, 4, 5, 3, 4, 4, 3, 4, 3, 5, 2, 1, 5, 5])
b = [2, 3, 4] * mult
ec = [False, True, False, True, True, True, True, True, True, False,
True, False, False, False]
c = in1d(a, b)
assert_array_equal(c, ec)

b = b + [5, 5, 4] * mult
ec = [True, True, True, True, True, True, True, True, True, True,
True, False, True, True]
c = in1d(a, b)
assert_array_equal(c, ec)

a = np.array([5, 7, 1, 2])
b = np.array([2, 4, 3, 1, 5] * mult)
ec = np.array([True, False, True, True])
c = in1d(a, b)
assert_array_equal(c, ec)

a = np.array([5, 7, 1, 1, 2])
b = np.array([2, 4, 3, 3, 1, 5] * mult)
ec = np.array([True, False, True, True, True])
c = in1d(a, b)
assert_array_equal(c, ec)

a = np.array([5, 5])
b = np.array([2, 2] * mult)
ec = np.array([False, False])
c = in1d(a, b)
assert_array_equal(c, ec)

a = np.array([5])
b = np.array([2])

ec = np.array([False])
c = in1d(a, b)
assert_array_equal(c, ec)

a = np.array([5, 5])
b = np.array([2, 2])

ec = np.array([False, False])
c = in1d(a, b)
assert_array_equal(c, ec)

assert_array_equal(in1d([], []), [])

def test_in1d_char_array( self ):
Expand Down