In [1]:
from filterbox import FrozenFilterBox, FilterBox
from litebox import LiteBox
from BTrees.OOBTree import OOBTree
import random

In [5]:
%%time
# let's try regular filterbox
objs = [{'x': i} for i in range(10**6)]
fb = FilterBox(objs, 'x')

CPU times: user 917 ms, sys: 86.7 ms, total: 1 s
Wall time: 1 s


In [2]:
objs = [{'x': i} for i in range(10**6)]

In [3]:
ffb = FrozenFilterBox(objs, 'x')

In [4]:
lb = LiteBox(objs, {'x': int})

In [5]:
%%timeit
ffb.find({'x': {'<': 10**5, '>': 10**5-10}})

3.83 µs ± 75.2 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [6]:
%%timeit
lb.find(f'x < {10**5} and x > {10**5-10}')

3.6 µs ± 31.2 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [7]:
# Not for short queries

In [8]:
%%timeit
ffb.find({'x': {'<': 10**5, '>': 10**5-100}})

4.22 µs ± 75.6 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [9]:
%%timeit
lb.find(f'x < {10**5} and x > {10**5-100}')

24.7 µs ± 123 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [10]:
# yes for long queries though.

In [11]:
%%timeit
ffb.find({'x': {'<': 10**5, '>': 10**5-1000}})

9.09 µs ± 114 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [12]:
%%timeit
lb.find(f'x < {10**5} and x > {10**5-1000}')

242 µs ± 13.1 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [13]:
%%time
bt = OOBTree()
for s in range(10**6):
    bt[s] = s
# ??????????

CPU times: user 209 ms, sys: 13.3 ms, total: 222 ms
Wall time: 221 ms


In [14]:
nums = list(range(10**6))

In [15]:
%%time
bt = OOBTree()
for _ in range(10**6):
    random.choice(nums)
    bt[s] = s
# ??????????

CPU times: user 375 ms, sys: 0 ns, total: 375 ms
Wall time: 375 ms


In [16]:
%%time
# now let's try regular filterbox
objs = [{'x': i} for i in range(1000000)]
# WTF!!! why so slow omg
# maybe we still need fast init or something, dang...
fb = FilterBox(objs, 'x')

CPU times: user 3min 20s, sys: 92.6 ms, total: 3min 20s
Wall time: 3min 20s


In [17]:
%%timeit
fb.find({'x': {'<': 10**5, '>': 10**5-100}})

22.8 µs ± 89.7 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [18]:
%%timeit
lb.find(f'x < {10**5} and x > {10**5-100}')

24.4 µs ± 284 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [19]:
_="""
OK, so BTrees is comparable to LiteBox on range queries.
It'd be faster (albeit less general) if the objects were hashable and we could do set ops on them driectly but that's OK.
Why not store the objs in the tree, and only deal in IDs if there's a combination to do?
So it'd be a BTree of key -> list[obj]. Worth considering.
"""