## Benchmarks
 - Range query, 1 attribute
 - Range query, 2 attributes
 

In [1]:
from dexer import Dexer, FrozenDexer
import time
import pandas as pd
from timeit import timeit, repeat
from litebox import LiteBox
from statistics import stdev, mean

In [178]:
cookies = []
FLAVORS = ['Peanut', 'Chocolate', 'Macadamia', 'Almond', 'Cinnamon', 
           'Butter', 'Caramel', 'Fudge', 'Candy', 'Mystery']
COLORS = ['Red', 'Orange', 'Yellow', 'Green', 'Blue',
         'Purple', 'Rainbow', 'Black', 'White', 'Invisible']
TAGS = [str(i).zfill(4) for i in range(10**4)]
cookies = []
num = 0
for i in range(10**6):
    
    cookie = {
        'num': num,
        'size': i % 10**6, 
        'chips': i % 10**5, 
        'sugar': i % 10**4, 
        'flavor': FLAVORS[i % 10], 
        'tag': TAGS[(i // 10) % len(TAGS)],
        'color': COLORS[(i // 100) % 10]
    }
    cookies.append(cookie)
    num += 1


print(len(cookies))

1000000


In [179]:
%%time
df = pd.DataFrame(cookies)

CPU times: user 1.03 s, sys: 23.3 ms, total: 1.05 s
Wall time: 1.05 s


In [180]:
%%time
lb = LiteBox(cookies, {'num': int, 'size': int, 'chips': int, 'sugar': int, 'flavor': str, 'tag': str, 'color': str})

CPU times: user 6.67 s, sys: 113 ms, total: 6.78 s
Wall time: 6.79 s


In [181]:
%%time
dex = Dexer(cookies, on=['num', 'size', 'chips', 'sugar' 'flavor', 'tag', 'color'])

CPU times: user 9.47 s, sys: 52.8 ms, total: 9.52 s
Wall time: 9.53 s


In [182]:
%%time
fdex = FrozenDexer(cookies, on=['num', 'size', 'chips', 'sugar' 'flavor', 'tag', 'color'])

CPU times: user 6.14 s, sys: 32.9 ms, total: 6.17 s
Wall time: 6.17 s


In [171]:
# One-attribute range query returning 100 results

def find_range1_df():
    return df[df['size'] < 100]

def find_range1_lc():
    return [o for o in cookies if o['size'] < 100]

def find_range1_lb():
    return lb.find('size < 100')

def find_range1_dex():
    return dex.find({
        'size': {'<': 100},
    })

def find_range1_fdex():
    return fdex.find({
        'size': {'<': 100},
    })

RANGE1 = [find_range1_df, find_range1_lc, find_range1_lb, find_range1_dex, find_range1_fdex]

In [172]:
for f in RANGE1:
    print(len(f()))


100
100
100
100
100


In [173]:
# Two-attribute range query returning 100 results
def find_range2_df():
    return df[(df['size'] < 100) & (df['chips'] < 100)]

def find_range2_lc():
    return [o for o in cookies if o['size'] < 1000 and o['chips'] < 100]

def find_range2_lb():
    return lb.find('size < 1000 and chips < 100')

def find_range2_dex():
    return dex.find({
        'size': {'<': 1000},
        'chips': {'<': 100},
    })

def find_range2_fdex():
    return fdex.find({
        'size': {'<': 1000},
        'chips': {'<': 100},
    })

RANGE2 = [find_range2_df, find_range2_lc, find_range2_lb, find_range2_dex, find_range2_fdex]

In [174]:
for f in RANGE2:
    print(len(f()))


100
100
100
100
100


In [187]:
# One-attribute exact match query returning 100 results
def find_eq1_df():
    return df[(df['tag'] == '1111')]

def find_eq1_lc():
    return [o for o in cookies if o['tag'] == '1111']

def find_eq1_lb():
    return lb.find('tag == "1111"')

def find_eq1_dex():
    return dex.find({
        'tag': '1111'
    })

def find_eq1_fdex():
    return fdex.find({
        'tag': '1111'
    })

EQ1 = [find_eq1_df, find_eq1_lc, find_eq1_lb, find_eq1_dex, find_eq1_fdex]

In [148]:
def run_timings(f, result_len, n_times=10):
    assert len(f()) == result_len
    return min(repeat(f, number=n_times))/n_times


In [188]:
n_repeat = 100
results = dict()
for f in RANGE1 + RANGE2 + EQ1:
    print(f.__name__)
    results[f.__name__] = run_timings(f, 100)

find_range1_df
find_range1_lc
find_range1_lb
find_range1_dex
find_range1_fdex
find_range2_df
find_range2_lc
find_range2_lb
find_range2_dex
find_range2_fdex
find_eq1_df
find_eq1_lc
find_eq1_lb
find_eq1_dex
find_eq1_fdex


In [189]:
for r, t in results.items():
    print(r, t)

find_range1_df 0.0009101113071665167
find_range1_lc 0.051943747501354665
find_range1_lb 5.443879636004567e-05
find_range1_dex 4.8754794988781214e-05
find_range1_fdex 1.6833702102303504e-05
find_range2_df 0.0022733806050382554
find_range2_lc 0.0498123213998042
find_range2_lb 0.0003733630990609527
find_range2_dex 0.0005058821989223361
find_range2_fdex 6.0588796623051165e-05
find_eq1_df 0.04267703660298139
find_eq1_lc 0.04795632830355316
find_eq1_lb 5.073370411992073e-05
find_eq1_dex 1.3539998326450586e-05
find_eq1_fdex 1.1276802979409695e-05


In [None]:
# One-attribute 'in' query, 1000 results
match_tags = [str(t).zfill(4) for t in range(1000, 1010)]

def find_in1_df():
    return df[(df['tag'] in match_tags)]

def find_in1_lc():
    return [o for o in cookies if o['tag'] in match_tags]

def find_in1_lb():
    return lb.find(f'tag in {match_tags}')

def find_in1_dex():
    return dex.find({
        'tag': {'in': match_tags}
    })

def find_in1_fdex():
    return fdex.find({
        'tag': {'in': match_tags}
    })

IN1 = [find_in1_df, find_in1_lc, find_in1_lb, find_in1_dex, find_in1_fdex]