# Import numpy and check version

In [1]:
import numpy as np
import timeit

In [2]:
np.version.version

'1.24.0'

# Prepare input datasets and check outputs

In [3]:
# set numpy seed for consistency
np.random.seed(12)

# 50000 integers between 0 and 4
random_int1 = np.random.randint(low=0, high=5, size=50000, dtype=int)
random_int2 = np.random.randint(low=0, high=5, size=50000, dtype=int)

# 50000 booleans
random_bool1 = np.random.choice([True,False], size=50000)
random_bool2 = np.random.choice([True,False], size=50000)

# integer arrays for in1d to compare (in1d is also utilised by isin)
main_arr = np.arange(start=0, stop=50000, step=1, dtype=int)
comp_arr = np.arange(start=0, stop=50000, step=2, dtype=int)

In [4]:
comp_int = np.equal(random_int1, random_int2)

In [5]:
comp_int[:20]

array([False, False, False, False,  True,  True,  True, False, False,
       False,  True, False, False, False,  True,  True, False, False,
        True, False])

In [6]:
comp_bool = np.equal(random_bool1, random_bool2)

In [7]:
comp_bool[:20]

array([False,  True,  True,  True, False, False, False, False, False,
        True, False,  True,  True,  True,  True,  True,  True, False,
        True, False])

In [8]:
comp_in1d = np.in1d(main_arr, comp_arr, kind='sort')

In [9]:
comp_in1d[:20]

array([ True, False,  True, False,  True, False,  True, False,  True,
       False,  True, False,  True, False,  True, False,  True, False,
        True, False])

## Timit setup

In [10]:
setup = '''
import numpy as np
# set numpy seed for consistency
np.random.seed(12)

# 50000 integers between 0 and 4
random_int1 = np.random.randint(low=0, high=5, size=50000, dtype=int)
random_int2 = np.random.randint(low=0, high=5, size=50000, dtype=int)

# 50000 booleans
random_bool1 = np.random.choice([True,False], size=50000)
random_bool2 = np.random.choice([True,False], size=50000)

# integer arrays for in1d to compare (in1d is also utilised by isin)
main_arr = np.arange(start=0, stop=50000, step=1, dtype=int)
comp_arr = np.arange(start=0, stop=50000, step=2, dtype=int)
'''

In [11]:
equal_func_int = '''
comp = np.equal(random_int1, random_int2)
'''

In [12]:
equal_func_bool = '''
comp = np.equal(random_bool1, random_bool2)
'''

In [13]:
in1d_func_sort = '''
comp_in1d = np.in1d(main_arr, comp_arr, kind='sort')
'''

In [14]:
in1d_func_table = '''
comp_in1d = np.in1d(main_arr, comp_arr, kind='table')
'''

## Run timit for different methods

#### Two integer arrays with length 50000 using method np.equal (method run 1 million times)

In [15]:
timeit.timeit(stmt=equal_func_int, setup=setup, number=1000000)

7.04463380004745

#### Two boolean arrays with length 50000 using method np.equal (method run 1 million times)

In [16]:
timeit.timeit(stmt=equal_func_bool, setup=setup, number=1000000)

2.9182767999591306

#### Two integer arrays compared using in1d using the "sort" method (method run 10 thousand times)

Note: this is exactly the same as running the in1d method using numpy version 1.23.5 and earlier as they only used what is now called the "sort" method

In [17]:
timeit.timeit(stmt=in1d_func_sort, setup=setup, number=10000)

18.4741442000377

#### Two integer arrays compared using in1d using the new "table" method (method run 10 thousand times)

Note: the "table" method was introduced in numpy 1.24.0. As such, earlier numpy releases do not include a "kind=" parameter, as they always use the "sort" method, as it was the only choice. 

In [18]:
timeit.timeit(stmt=in1d_func_table, setup=setup, number=10000)

4.819625700009055