In [18]:
from filterbox import FrozenFilterBox, FilterBox
from litebox import LiteBox
from BTrees.OOBTree import OOBTree
import random

In [2]:
objs = [{'x': i} for i in range(10**6)]

In [3]:
ffb = FrozenFilterBox(objs, 'x')

In [30]:
lb = LiteBox(objs, {'x': int})

In [6]:
%%timeit
ffb.find({'x': {'<': 10**5, '>': 10**5-10}})

8.47 µs ± 83.2 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [8]:
%%timeit
lb.find(f'x < {10**5} and x > {10**5-10}')

6.98 µs ± 83 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [9]:
# Not for short queries

In [10]:
%%timeit
ffb.find({'x': {'<': 10**5, '>': 10**5-100}})

9.3 µs ± 123 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [11]:
%%timeit
lb.find(f'x < {10**5} and x > {10**5-100}')

47.1 µs ± 415 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [12]:
# yes for long queries though.

In [15]:
%%timeit
ffb.find({'x': {'<': 10**5, '>': 10**5-1000}})

20.9 µs ± 1.29 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [16]:
%%timeit
lb.find(f'x < {10**5} and x > {10**5-1000}')

444 µs ± 11.5 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [16]:
%%time
bt = OOBTree()
for s in range(10**6):
    bt[s] = s
# ??????????

CPU times: user 494 ms, sys: 6.64 ms, total: 501 ms
Wall time: 499 ms


In [20]:
nums = list(range(10**6))

In [24]:
%%time
bt = OOBTree()
for _ in range(10**6):
    random.choice(nums)
    bt[s] = s
# ??????????

CPU times: user 661 ms, sys: 11 µs, total: 661 ms
Wall time: 661 ms


In [9]:
%%time
# now let's try regular filterbox
objs = [{'x': i} for i in range(1000000)]
# WTF!!! why so slow omg
# maybe we still need fast init or something, dang...
fb = FilterBox(objs, 'x')

CPU times: user 3min 20s, sys: 161 ms, total: 3min 20s
Wall time: 3min 20s


In [34]:
%%timeit
fb.find({'x': {'<': 10**5, '>': 10**5-100}})

51.6 µs ± 2.79 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [35]:
%%timeit
lb.find(f'x < {10**5} and x > {10**5-100}')

47.8 µs ± 447 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [36]:
_="""
OK, so BTrees is comparable to LiteBox on range queries.
It'd be faster (albeit less general) if the objects were hashable and we could do set ops on them driectly but that's OK.
Why not store the objs in the tree, and only deal in IDs if there's a combination to do?
So it'd be a BTree of key -> list[obj]. Worth considering.
"""