In [52]:
import random
import time
from dataclasses import dataclass
from rangeindex import RangeIndex
from matplotlib import pyplot as plt
%matplotlib inline

az = 'qwertyuiopasdfghjklzxcvbnm'
ten = list(range(10))


@dataclass
class Thing:
    x: int
    y: float
    desc: str


def make_thing():
    s = []
    for i in range(5):
        s.append(random.choice(az))
    return Thing(x=random.choice(ten), y=random.random(), desc=''.join(s))


def compare_speeds(n=10**3, portion_to_find=0.1):
    # amount_to_find
    things = [make_thing() for _ in range(n)]
    ri = RangeIndex({'x': int, 'y': float})
    t0 = time.time()
    ri.add_many(things)
    t1 = time.time()
    found_float = ri.find([('y', '<', portion_to_find), ('x', '<=', 10)])
    t2 = time.time()
    #found_str = ri.find([('desc', '<', 'zzzzz')])
    t3 = time.time()
    #linear_find = tuple(t for t in things if t.y < portion_to_find and t.x <= 10)
    linear_find = tuple(filter(lambda t: t.y < portion_to_find and t.x <= 10, things))
    t4 = time.time()
    
    results = {
        'filter': t4-t3,
        'ri_find': t2-t1
    }

    """
    print('t_add {} elements:'.format(len(things)), t1-t0)
    print('t_find {} elements:'.format(len(found_float)), t2-t1)
    #print('t_find {} elements:'.format(len(found_str)), t3-t2)
    print('linear find {} elements:'.format(len(linear_find)), t4-t3)

    print((t4-t3)/(t2-t1))
    """
    return results

In [46]:
def insert_time(n=10**6):
    things = [make_thing() for _ in range(n)]
    ri = RangeIndex({'x': int, 'y': float})
    rj = RangeIndex({'x': int, 'y': float})
    t0 = time.time()
    for t in things:
        ri.add(t)
    t1 = time.time()
    ri.add_many(things)
    t2 = time.time()
    
    t_each = t1-t0
    t_many = t2-t1
    print(t_each, t_many)
    
for e in range(4,7):
    insert_time(n=10**e)

0.09688425064086914 0.02398061752319336
0.9980337619781494 0.5340995788574219
10.601683139801025 4.041484832763672


In [51]:
4.041484832763672/10**6

4.0414848327636716e-06

In [53]:
find_times = {}
filter_times = {}
n_runs = 5
for reps in range(n_runs):
    for n_exp in range(6,7):
        for p_exp in range(-6, 1):
            n = 10**n_exp
            p = 10**p_exp
            res = compare_speeds(n=n, portion_to_find=p)
            n_objs = int(p*n)
            find_times[n_objs] = find_times.get(n_objs, 0) + res['ri_find'] / n_runs
            filter_times[n_objs] = filter_times.get(n_objs, 0) + res['filter'] / n_runs

In [54]:
def to_str(t: float):
    if t < 10**-6:
        return "{}μs".format(round(t*10**6,3))
    elif t < 10**-3:
        return "{}ms".format(round(t*10**3, 3))
    else:
        return "{}s".format(round(t, 2))

for key in sorted(find_times.keys()):
    ft = find_times[key]
    print(key, to_str(find_times[key]), to_str(filter_times[key]), round(filter_times[key]/find_times[key],3))

1 0.106ms 0.08s 752.424
10 0.106ms 0.08s 709.286
100 0.268ms 0.08s 282.092
1000 0.0s 0.07s 42.132
10000 0.02s 0.07s 4.926
100000 0.15s 0.08s 0.522
1000000 1.43s 0.12s 0.083
