In [1]:
from filterbox import FrozenFilterBox, FilterBox
from litebox import LiteBox
from BTrees.OOBTree import OOBTree
import random

In [21]:
%%time
# let's try regular filterbox
objs = [{'x': i} for i in range(10**6)]
fb = FilterBox(objs, 'x')

CPU times: user 1.26 s, sys: 66.6 ms, total: 1.33 s
Wall time: 1.33 s


In [22]:
%%time
ffb = FrozenFilterBox(objs, 'x')

CPU times: user 628 ms, sys: 6.67 ms, total: 635 ms
Wall time: 634 ms


In [23]:
%%time
lb = LiteBox(objs, {'x': int})

CPU times: user 1.94 s, sys: 46.6 ms, total: 1.98 s
Wall time: 1.98 s


In [5]:
# test small queries

In [24]:
%%timeit
fb.find({'x': {'<': 10**5, '>': 10**5-10}})

4.7 µs ± 44.4 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [6]:
%%timeit
ffb.find({'x': {'<': 10**5, '>': 10**5-10}})

4.77 µs ± 51.3 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [7]:
%%timeit
lb.find(f'x < {10**5} and x > {10**5-10}')

3.59 µs ± 31.3 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [8]:
# test medium queries

In [28]:
%%timeit
fb.find({'x': {'<': 10**5, '>': 10**5-100}})

18.3 µs ± 169 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [26]:
%%timeit
ffb.find({'x': {'<': 10**5, '>': 10**5-100}})

5.17 µs ± 35.7 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [27]:
%%timeit
lb.find(f'x < {10**5} and x > {10**5-100}')

24.4 µs ± 244 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [11]:
# yes for long queries though.

In [29]:
%%timeit
fb.find({'x': {'<': 10**5, '>': 10**5-1000}})

162 µs ± 1.03 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [12]:
%%timeit
ffb.find({'x': {'<': 10**5, '>': 10**5-1000}})

10.1 µs ± 131 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [13]:
%%timeit
lb.find(f'x < {10**5} and x > {10**5-1000}')

223 µs ± 3.09 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [14]:
%%time
bt = OOBTree()
for s in range(10**6):
    bt[s] = s
# ??????????

CPU times: user 199 ms, sys: 16.6 ms, total: 215 ms
Wall time: 215 ms


In [15]:
nums = list(range(10**6))

In [16]:
%%time
bt = OOBTree()
for _ in range(10**6):
    random.choice(nums)
    bt[s] = s
# ??????????

CPU times: user 303 ms, sys: 2 µs, total: 303 ms
Wall time: 303 ms


In [17]:
%%time
# now let's try regular filterbox
objs = [{'x': i} for i in range(1000000)]
# WTF!!! why so slow omg
# maybe we still need fast init or something, dang...
fb = FilterBox(objs, 'x')

CPU times: user 1.14 s, sys: 80 ms, total: 1.22 s
Wall time: 1.22 s


In [18]:
%%timeit
fb.find({'x': {'<': 10**5, '>': 10**5-100}})

18.6 µs ± 73.7 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [19]:
%%timeit
lb.find(f'x < {10**5} and x > {10**5-100}')

24.1 µs ± 298 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [20]:
_="""
OK, so BTrees is comparable to LiteBox on range queries.
It'd be faster (albeit less general) if the objects were hashable and we could do set ops on them driectly but that's OK.
Why not store the objs in the tree, and only deal in IDs if there's a combination to do?
So it'd be a BTree of key -> list[obj]. Worth considering.
"""