Skip to content

Commit 6441c2a

Browse files
author
rgommers
committed
ENH: speed up in1d() in the case of ar1 >> ar2. Closes #1603.
A timing script justifying the switching criterion is attached to ticket 1603. Thanks to Neil Crighton.
1 parent 3071eab commit 6441c2a

File tree

2 files changed

+59
-55
lines changed

2 files changed

+59
-55
lines changed

numpy/lib/arraysetops.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -277,7 +277,7 @@ def setxor1d(ar1, ar2, assume_unique=False):
277277

278278
def in1d(ar1, ar2, assume_unique=False):
279279
"""
280-
Test whether each element of a 1D array is also present in a second array.
280+
Test whether each element of a 1-D array is also present in a second array.
281281
282282
Returns a boolean array the same length as `ar1` that is True
283283
where an element of `ar1` is in `ar2` and False otherwise.
@@ -305,7 +305,7 @@ def in1d(ar1, ar2, assume_unique=False):
305305
Notes
306306
-----
307307
`in1d` can be considered as an element-wise function version of the
308-
python keyword `in`, for 1D sequences. ``in1d(a, b)`` is roughly
308+
python keyword `in`, for 1-D sequences. ``in1d(a, b)`` is roughly
309309
equivalent to ``np.array([item in b for item in a])``.
310310
311311
.. versionadded:: 1.4.0
@@ -321,6 +321,14 @@ def in1d(ar1, ar2, assume_unique=False):
321321
array([0, 2, 0])
322322
323323
"""
324+
# This code is significantly faster when the condition is satisfied.
325+
if len(ar2) < 10 * len(ar1) ** 0.145:
326+
mask = np.zeros(len(ar1), dtype=np.bool)
327+
for a in ar2:
328+
mask |= (ar1 == a)
329+
return mask
330+
331+
# Otherwise use sorting
324332
if not assume_unique:
325333
ar1, rev_idx = np.unique(ar1, return_inverse=True)
326334
ar2 = np.unique(ar2)

numpy/lib/tests/test_arraysetops.py

Lines changed: 49 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -90,66 +90,62 @@ def test_ediff1d(self):
9090
assert_array_equal([1],ediff1d(two_elem))
9191

9292
def test_in1d(self):
93-
a = np.array( [5, 7, 1, 2] )
94-
b = np.array( [2, 4, 3, 1, 5] )
95-
96-
ec = np.array( [True, False, True, True] )
97-
c = in1d( a, b, assume_unique=True )
98-
assert_array_equal( c, ec )
99-
100-
a[0] = 8
101-
ec = np.array( [False, False, True, True] )
102-
c = in1d( a, b, assume_unique=True )
103-
assert_array_equal( c, ec )
104-
105-
a[0], a[3] = 4, 8
106-
ec = np.array( [True, False, True, False] )
107-
c = in1d( a, b, assume_unique=True )
108-
assert_array_equal( c, ec )
109-
110-
a = np.array([5,4,5,3,4,4,3,4,3,5,2,1,5,5])
111-
b = [2,3,4]
112-
113-
ec = [False, True, False, True, True, True, True, True, True, False,
114-
True, False, False, False]
115-
c = in1d(a, b)
116-
assert_array_equal(c, ec)
117-
118-
b = b + [5, 5, 4]
119-
120-
ec = [True, True, True, True, True, True, True, True, True, True,
121-
True, False, True, True]
122-
c = in1d(a, b)
123-
assert_array_equal(c, ec)
124-
125-
a = np.array([5, 7, 1, 2])
126-
b = np.array([2, 4, 3, 1, 5])
127-
128-
ec = np.array([True, False, True, True])
129-
c = in1d(a, b)
130-
assert_array_equal(c, ec)
131-
132-
a = np.array([5, 7, 1, 1, 2])
133-
b = np.array([2, 4, 3, 3, 1, 5])
134-
135-
ec = np.array([True, False, True, True, True])
136-
c = in1d(a, b)
137-
assert_array_equal(c, ec)
93+
# we use two different sizes for the b array here to test the
94+
# two different paths in in1d().
95+
for mult in (1, 10):
96+
a = np.array([5, 7, 1, 2])
97+
b = np.array([2, 4, 3, 1, 5] * mult)
98+
ec = np.array([True, False, True, True])
99+
c = in1d(a, b, assume_unique=True)
100+
assert_array_equal(c, ec)
101+
102+
a[0] = 8
103+
ec = np.array([False, False, True, True])
104+
c = in1d(a, b, assume_unique=True)
105+
assert_array_equal(c, ec)
106+
107+
a[0], a[3] = 4, 8
108+
ec = np.array([True, False, True, False])
109+
c = in1d(a, b, assume_unique=True)
110+
assert_array_equal(c, ec)
111+
112+
a = np.array([5, 4, 5, 3, 4, 4, 3, 4, 3, 5, 2, 1, 5, 5])
113+
b = [2, 3, 4] * mult
114+
ec = [False, True, False, True, True, True, True, True, True, False,
115+
True, False, False, False]
116+
c = in1d(a, b)
117+
assert_array_equal(c, ec)
118+
119+
b = b + [5, 5, 4] * mult
120+
ec = [True, True, True, True, True, True, True, True, True, True,
121+
True, False, True, True]
122+
c = in1d(a, b)
123+
assert_array_equal(c, ec)
124+
125+
a = np.array([5, 7, 1, 2])
126+
b = np.array([2, 4, 3, 1, 5] * mult)
127+
ec = np.array([True, False, True, True])
128+
c = in1d(a, b)
129+
assert_array_equal(c, ec)
130+
131+
a = np.array([5, 7, 1, 1, 2])
132+
b = np.array([2, 4, 3, 3, 1, 5] * mult)
133+
ec = np.array([True, False, True, True, True])
134+
c = in1d(a, b)
135+
assert_array_equal(c, ec)
136+
137+
a = np.array([5, 5])
138+
b = np.array([2, 2] * mult)
139+
ec = np.array([False, False])
140+
c = in1d(a, b)
141+
assert_array_equal(c, ec)
138142

139143
a = np.array([5])
140144
b = np.array([2])
141-
142145
ec = np.array([False])
143146
c = in1d(a, b)
144147
assert_array_equal(c, ec)
145148

146-
a = np.array([5, 5])
147-
b = np.array([2, 2])
148-
149-
ec = np.array([False, False])
150-
c = in1d(a, b)
151-
assert_array_equal(c, ec)
152-
153149
assert_array_equal(in1d([], []), [])
154150

155151
def test_in1d_char_array( self ):

0 commit comments

Comments
 (0)