In [6]:
import sys
sys.path.append('../module')
import biset
from bisetw import *
import numpy as np

In [7]:
RNG = np.random.default_rng(42)

In [8]:
dense_ints_matrix = RNG.choice(1000, (10_000, 40)).astype('i')
dense_ints_matrix

array([[ 89, 773, 654, ..., 827, 276, 631],
       [165, 758, 700, ..., 226, 564, 669],
       [940, 437, 160, ..., 139, 245, 114],
       ...,
       [229, 639, 203, ..., 848, 189, 681],
       [444, 358, 602, ..., 378, 429, 907],
       [461, 276, 873, ..., 109, 356,  42]],
      shape=(10000, 40), dtype=int32)

In [9]:
sparsed_ints_matrix = RNG.choice(1000, (10_000, 40)).astype('i')
sparsed_ints_matrix_ones_counts = 3 + RNG.choice(sparsed_ints_matrix.shape[1] - 3, sparsed_ints_matrix.shape[0])

for i, ones_count in enumerate(sparsed_ints_matrix_ones_counts):
    sparsed_ints_matrix[i,ones_count:] = -1

sparsed_ints_matrix, sparsed_ints_matrix_ones_counts

(array([[754, 403, 902, ...,  -1,  -1,  -1],
        [528, 311, 725, ...,  -1,  -1,  -1],
        [889, 213, 560, ...,  -1,  -1,  -1],
        ...,
        [685, 410, 203, ...,  -1,  -1,  -1],
        [265, 333, 526, ...,  -1,  -1,  -1],
        [258, 702, 758, ...,  -1,  -1,  -1]],
       shape=(10000, 40), dtype=int32),
 array([10, 17, 18, ..., 35,  4, 27], shape=(10000,)))

In [10]:
x = bisetw()
assert len(x) == 0
assert not dense_ints_matrix[0] in x
x.add(dense_ints_matrix[0])
assert len(x) == 1
assert dense_ints_matrix[0] in x

In [11]:
x = bisetw()
x.add(dense_ints_matrix[0])
x.add(dense_ints_matrix[0])
assert len(x) == 1
assert dense_ints_matrix[0] in x

In [12]:
x = bisetw()
x.add(dense_ints_matrix[0])
x.remove(dense_ints_matrix[0])
assert len(x) == 0
assert not dense_ints_matrix[0] in x

try:
    x.remove(dense_ints_matrix[0])
except KeyError as e:
    print(e)

'#0: [89, 773, 654, 438, 433, 858, 85, 697, 201, 94, 526, 975, 735, 761, 717, 786, 513, 128, 839, 450, 500, 370, 182, 926, 781, 643, 402, 822, 545, 443, 450, 227, 92, 554, 887, 63, 858, 827, 276, 631]'


In [13]:
x = bisetw()
x.add(dense_ints_matrix[0])
x.clear()
assert len(x) == 0
assert not dense_ints_matrix[0] in x
x.add(dense_ints_matrix[0])
assert len(x) == 1
assert dense_ints_matrix[0] in x

In [14]:
x = bisetw()
x.add_many(dense_ints_matrix)

for ints in dense_ints_matrix:
    assert ints in x

In [15]:
x = bisetw()
x.add_many(dense_ints_matrix)
assert len(x) > 0
x.clear()

for ints in dense_ints_matrix:
    assert not ints in x

In [16]:
x = bisetw()
x.add(dense_ints_matrix[0])
assert len(x) == 1
assert dense_ints_matrix[0] in x
assert not dense_ints_matrix[1] in x
x.replace(dense_ints_matrix[0], dense_ints_matrix[0])
assert len(x) == 1
assert dense_ints_matrix[0] in x
assert not dense_ints_matrix[1] in x
x.replace(dense_ints_matrix[0], dense_ints_matrix[1])
assert len(x) == 1
assert not dense_ints_matrix[0] in x
assert dense_ints_matrix[1] in x

In [17]:
inds = RNG.choice(dense_ints_matrix.shape[0], 100_000)
dense_ints_matrix_with_dups = dense_ints_matrix[inds]

x = bisetw()
x_is_added_boolmap = x.add_many(dense_ints_matrix_with_dups)
assert np.count_nonzero(~x_is_added_boolmap) > 0

y = set()
y_is_added_boolmap = []

for ints in dense_ints_matrix_with_dups:
    e = tuple(ints)  

    if not e in y:
        y.add(e)
        y_is_added_boolmap.append(True)
    else:
        y_is_added_boolmap.append(False)

y_is_added_boolmap = np.array(y_is_added_boolmap)
assert np.all(x_is_added_boolmap == y_is_added_boolmap)

In [18]:
dense_ints_matrix2 = RNG.choice(1000, (10_000, 40)).astype('i')
ints_for_removal = np.vstack([dense_ints_matrix, dense_ints_matrix2])
RNG.shuffle(ints_for_removal, axis=0)

x = bisetw()
x.add_many(dense_ints_matrix)
x_is_removed_boolmap = x.remove_many(ints_for_removal)
assert np.count_nonzero(~x_is_removed_boolmap) > 0

y = set(map(tuple, dense_ints_matrix))
y_is_removed_boolmap = []

for ints in ints_for_removal:
    e = tuple(ints)

    if e in y:
        y.remove(e)
        y_is_removed_boolmap.append(True)
    else:
        y_is_removed_boolmap.append(False)

y_is_removed_boolmap = np.array(y_is_removed_boolmap)
assert np.all(x_is_removed_boolmap == y_is_removed_boolmap)

In [19]:
inds = RNG.choice(sparsed_ints_matrix.shape[0], 100_000)
sparsed_ints_matrix_with_dups = sparsed_ints_matrix[inds]
sparsed_ints_matrix_ones_counts_with_dups= sparsed_ints_matrix_ones_counts[inds]

x = bisetw()
x_is_added_boolmap = x.add_many(sparsed_ints_matrix_with_dups, sparsed_ints_matrix_ones_counts_with_dups)
assert np.count_nonzero(~x_is_added_boolmap) > 0

y = set()
y_is_added_boolmap = []

for ints, ones_count in zip(sparsed_ints_matrix_with_dups, sparsed_ints_matrix_ones_counts_with_dups):
    e = tuple(ints[:ones_count])  

    if not e in y:
        y.add(e)
        y_is_added_boolmap.append(True)
    else:
        y_is_added_boolmap.append(False)

y_is_added_boolmap = np.array(y_is_added_boolmap)
assert np.all(x_is_added_boolmap == y_is_added_boolmap)

In [20]:
sparsed_ints_matrix2 = RNG.choice(1000, (10_000, 40)).astype('i')
sparsed_ints_matrix2_ones_counts = RNG.choice(sparsed_ints_matrix2.shape[1], sparsed_ints_matrix2.shape[0])

for i, ones_count in enumerate(sparsed_ints_matrix2_ones_counts):
    sparsed_ints_matrix2[i,ones_count:] = -1

combined_sparsed_ints_matrix_for_removal = np.vstack([sparsed_ints_matrix, sparsed_ints_matrix2])
combined_sparsed_ints_matrix_ones_counts_for_removal = np.hstack([sparsed_ints_matrix_ones_counts, sparsed_ints_matrix2_ones_counts])
ii = RNG.choice(combined_sparsed_ints_matrix_for_removal.shape[0], combined_sparsed_ints_matrix_for_removal.shape[0], replace=False)
combined_sparsed_ints_matrix_for_removal = combined_sparsed_ints_matrix_for_removal[ii]
combined_sparsed_ints_matrix_ones_counts_for_removal = combined_sparsed_ints_matrix_ones_counts_for_removal[ii]

x = bisetw()
x.add_many(sparsed_ints_matrix, sparsed_ints_matrix_ones_counts)
x_is_removed_boolmap = x.remove_many(combined_sparsed_ints_matrix_for_removal, combined_sparsed_ints_matrix_ones_counts_for_removal)
assert np.count_nonzero(~x_is_removed_boolmap) > 0

y = set()

for ints, ones_count in zip(sparsed_ints_matrix, sparsed_ints_matrix_ones_counts):
    e = tuple(ints[:ones_count])  
    y.add(e)

y_is_removed_boolmap = []

for ints, ones_count in zip(combined_sparsed_ints_matrix_for_removal, combined_sparsed_ints_matrix_ones_counts_for_removal):
    e = tuple(ints[:ones_count])

    if e in y:
        y.remove(e)
        y_is_removed_boolmap.append(True)
    else:
        y_is_removed_boolmap.append(False)

y_is_removed_boolmap = np.array(y_is_removed_boolmap)
assert np.all(x_is_removed_boolmap == y_is_removed_boolmap)

In [21]:
ints_from = dense_ints_matrix[:100]
ints_to = dense_ints_matrix[-100:]

x = bisetw()
x.add_many(ints_from)
is_replaced_boolmap = x.replace_many(ints_from, ints_from)
assert np.all(is_replaced_boolmap)

for ints in ints_from:
    assert ints in x

is_replaced_boolmap = x.replace_many(ints_from, ints_to)
assert np.all(is_replaced_boolmap)

for ints in ints_from:
    assert not ints in x

for ints in ints_to:
    assert ints in x

x.clear()
x.add_many(ints_from)

try:
    x.replace_many(ints_to, ints_from)
except KeyError as e:
    print(e)

x.clear()
x.add_many(ints_from)
x.add(ints_to[0])
is_replaced_boolmap = x.replace_many(ints_from, ints_to)
assert is_replaced_boolmap[0] == False
assert np.all(is_replaced_boolmap[1:])

x.clear()
x.add_many(ints_from)
x.add_many(ints_to)
assert len(x) == ints_from.shape[0] + ints_to.shape[0]
is_replaced_boolmap = x.replace_many(ints_from, ints_to)
assert np.all(is_replaced_boolmap == False)

'keyFrom #0: [646, 590, 64, 895, 201, 808, 357, 242, 944, 487, 383, 142, 826, 518, 106, 666, 85, 807, 619, 150, 276, 553, 544, 761, 724, 274, 195, 162, 501, 372, 378, 223, 82, 275, 531, 481, 44, 941, 567, 185]'


In [22]:
ints_from = sparsed_ints_matrix[:100]
ints_from_ones_counts = sparsed_ints_matrix_ones_counts[:100]
ints_to = sparsed_ints_matrix[-100:]
ints_to_ones_counts = sparsed_ints_matrix_ones_counts[-100:]

x = bisetw()
x.add_many(ints_from, key_sizes=ints_from_ones_counts)
is_replaced_boolmap = x.replace_many(ints_from, ints_from, key_from_sizes=ints_from_ones_counts, key_to_sizes=ints_from_ones_counts)
assert np.all(is_replaced_boolmap)

for ints, ones_count in zip(ints_from, ints_from_ones_counts):
    assert ints[:ones_count] in x

is_replaced_boolmap = x.replace_many(ints_from, ints_to, key_from_sizes=ints_from_ones_counts, key_to_sizes=ints_to_ones_counts)
assert np.all(is_replaced_boolmap)

for ints, ones_count in zip(ints_from, ints_from_ones_counts):
    assert not ints[:ones_count] in x

for ints, ones_count in zip(ints_to, ints_to_ones_counts):
    assert ints[:ones_count] in x

x.clear()
x.add_many(ints_from, key_sizes=ints_from_ones_counts)

try:
    x.replace_many(ints_to, ints_from, key_from_sizes=ints_to_ones_counts, key_to_sizes=ints_from_ones_counts)
except KeyError as e:
    print(e)

x.clear()
x.add_many(ints_from, key_sizes=ints_from_ones_counts)
x.add(ints_to[0,:ints_to_ones_counts[0]])
is_replaced_boolmap = x.replace_many(ints_from, ints_to, key_from_sizes=ints_from_ones_counts, key_to_sizes=ints_to_ones_counts)
assert is_replaced_boolmap[0] == False
assert np.all(is_replaced_boolmap[1:])

x.clear()
x.add_many(ints_from, key_sizes=ints_from_ones_counts)
x.add_many(ints_to, key_sizes=ints_to_ones_counts)
assert len(x) == ints_from.shape[0] + ints_to.shape[0]
is_replaced_boolmap = x.replace_many(ints_from, ints_to, key_from_sizes=ints_from_ones_counts, key_to_sizes=ints_to_ones_counts)
assert np.all(is_replaced_boolmap == False)

'keyFrom #0: [423, 98, 392, 347, 921, 648, 122, 104, 736, 355, 471, 619, 177, 78, 888, 934, 834, 792, 622, 324, 878, 978]'


In [23]:
%%timeit
y = set()

61.2 ns ± 4.96 ns per loop (mean ± std. dev. of 7 runs, 10,000,000 loops each)


In [24]:
%%timeit
x = bisetw()

355 ns ± 7.81 ns per loop (mean ± std. dev. of 7 runs, 1,000,000 loops each)


In [25]:
%%timeit
x = biset.biset_create()
biset.biset_destroy(x)

153 ns ± 0.258 ns per loop (mean ± std. dev. of 7 runs, 10,000,000 loops each)


In [26]:
biset_create = biset.biset_create
biset_destroy = biset.biset_destroy

In [27]:
%%timeit
x = biset_create()
biset_destroy(x)

144 ns ± 0.126 ns per loop (mean ± std. dev. of 7 runs, 10,000,000 loops each)


In [28]:
x = bisetw()

In [29]:
%%timeit
x.clear()
x.add_many(dense_ints_matrix)

2.97 ms ± 4.4 μs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [30]:
y = set()

In [31]:
%%timeit
y.clear()
is_added = []

for ints in dense_ints_matrix:
    e = tuple(ints)  

    if not e in y:
        y.add(e)
        is_added.append(True)
    else:
        is_added.append(False)

27.5 ms ± 147 μs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [32]:
%%timeit
x.clear()
x.add_many(sparsed_ints_matrix, sparsed_ints_matrix_ones_counts)

2.82 ms ± 16 μs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [33]:
%%timeit
y.clear()
is_added = []

for ints, ones_count in zip(sparsed_ints_matrix, sparsed_ints_matrix_ones_counts):
    e = tuple(ints[:ones_count])  

    if not e in y:
        y.add(e)
        is_added.append(True)
    else:
        is_added.append(False)

21.5 ms ± 161 μs per loop (mean ± std. dev. of 7 runs, 10 loops each)
