In [1]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import time
import matplotlib.colors as col
import os

In [2]:
basedir = "../result/"
outdir = "target/"

def ensure_exists(dir):
    if not os.path.exists(dir):
        os.makedirs(dir)

too_big_for_rstar = [
    "synthetic_180x90x256_BigElement",
    "synthetic_180x90x256_VeryBigElement",
    "synthetic_180x90x64_VeryVeryBigElement",
    "synthetic_180x90x256_VeryVeryBigElement",
]

sizes = [16_200, 44_692, 64_800, 140_974, 259_200, 1_036_800, 3_808_651, 4_147_200]
itersz = [0, *sizes[:], 5_000_000]

ranges = []
for previous, current in zip(itersz, itersz[1:]):
    ranges.append((previous, current))
def rangeindex(number):
    for ((cur, next), i) in zip(ranges, range(len(ranges))):
        if number > cur and number <= next:
            return i
    return None

env_sizes = [16, 64, 256, 1024, 4096]
env_count = 16

nelements = {
    "matthe": 3_808_651,
    "open": 140_974,
    "simple": 44_692,
    "180x90x256": 4_147_200,
    "180x90x64": 1_036_800,
    "180x90x16": 259_200,
    "180x90x4": 64_800,
    "180x90x1": 16_200,
}

types = {
    "VeryVeryBigElement": 1024,
    "VeryBigElement": 512,
    "BiggerElement": 24,
    "BigElement": 256,
    "Element": 12,
}

def typename_by_mult(mult):
    for (tn, tsz) in types.items():
        if tsz == mult:
            return tn
    return None

hprcol = col.get_named_colors_mapping().get("green")
rstcol = col.get_named_colors_mapping().get("orange")

hprlab = "HPRTree"
rstlab = "R*Tree"

imgext = ".svg"

In [3]:
def getcount(datasetname: str):
    count = None
    for (k, v) in nelements.items():
        if datasetname.count(k) != 0:
            count = v
            break
    if count == None:
        if datasetname.find("clustered") != -1:
            startidx = datasetname.index("clustered") + len("clustered_")
            endidx = datasetname.rfind("_")-6
            nums = datasetname[startidx:endidx].split("_")
            count = int(nums[0])*int(nums[1])
        elif datasetname.find("uniform") != -1:
            startidx = datasetname.index("uniform") + len("uniform_")
            endidx = datasetname.rfind("_")-2
            count = int(datasetname[startidx:endidx])
    assert count != None, "could not recognize dataset"
    return count

def gettypemult(datasetname: str):
    sizemult = None
    for (k, v) in types.items():
        if datasetname.count(k) != 0:
            sizemult = v
            break
    assert sizemult != None, "could not recognize type"
    return sizemult

def theoreticalmin(datasetname: str):
    base = getcount(datasetname)
    sizemult = gettypemult(datasetname)    

    return base * sizemult

In [4]:
def foreachfileindir(dir, do):
    for filename in os.scandir(dir):
        if filename.is_file():
            do(filename)

def readlinestointarr(filename, mult = None):
    arr = []
    with open(filename) as f:
        for l in f:
            if mult != None:
                arr.append(int(float(l)*mult))
            else:
                arr.append(int(l))
    return arr

analysis_def = ["avg", "std", "max", "min"]
def analyse(data):
    avg = np.mean(data)
    std = np.std(data)
    _max = np.max(data)
    _min = np.min(data)
    return (avg, std, _max, _min, len(data))


In [5]:
def getbuilddirinfo(dir, mult = None):
    basedata = {}
    analysisdata = {}
    def addfilecontent(fn):
        newname = str(fn.name).replace("_hprtree", "").replace("_rstar", "")
        basedata[newname] = readlinestointarr(fn, mult)
    foreachfileindir(dir, addfilecontent)
    for (k, v) in basedata.items():
        analysisdata[k] = analyse(v)
    return (basedata, analysisdata)

In [10]:
save_plotdirinfos = True

def plotdirinfos(datahpr, datarst, title, fname, ylabel = "", subdir = ""):
    labels = []
    plt.figure()
    # start_plot0 = time.perf_counter()
    parts = plt.violinplot(datahpr, showmeans=False, showmedians=False, showextrema=False)
    for pc in parts['bodies']:
        pc.set_facecolor(hprcol)
        # pc.set_edgecolor('black')
        pc.set_alpha(1)
    color = parts["bodies"][0].get_facecolor().flatten()
    labels.append((mpatches.Patch(color=color), hprlab))
    # end_plot0 = time.perf_counter()
    # print(f"plot0 done in {end_plot0 - start_plot0}")

    # start_plot1 = time.perf_counter()
    parts = plt.violinplot(datarst, showmeans=False, showmedians=False, showextrema=False)
    for pc in parts['bodies']:
        pc.set_facecolor(rstcol)
        # pc.set_edgecolor('black')
        pc.set_alpha(1)
    color = parts["bodies"][0].get_facecolor().flatten()
    labels.append((mpatches.Patch(color=color), rstlab))
    # end_plot1 = time.perf_counter()
    # print(f"plot1 done in {end_plot0 - start_plot0}")

    plt.xticks([])
    plt.legend(*zip(*labels), loc=2)
    plt.ylabel(ylabel)
    plt.title(f"{title}")

    # start_save = time.perf_counter()
    if save_plotdirinfos:
        plt.savefig(f"{outdir}{subdir}{fname}{imgext}")
    else:
        plt.show()
    plt.close()
    # end_save = time.perf_counter()
    # print(f"save done in {end_plot0 - start_plot0}")

In [6]:
do_print = False

def printinit():
    global writetext
    writetext = ""

def printcommit(filename):
    writefile = open(filename, "w+")
    writefile.write(writetext)
    writefile.close()
    writefile = None
def print_or_write(text):
    if do_print:
        print(text)
    else:
        global writetext
        writetext += text
        writetext += "\n"

In [10]:
def do_analytics(func_hprdict, func_rstdict, func_hpranal, func_rstanal, subdir, statname, newnamefunc, envelopes = False):
    func_hprarr = {}
    func_rstarr = {}
    func_mindiff = 999e999
    func_mindiff_name = None
    func_maxdiff = -999e999
    func_maxdiff_name = None
    func_worsecount = 0
    func_bettercount = 0

    printinit()

    for sz in env_sizes:
        szstr = str(sz)
        for mult in types.values():
            multstr = str(mult)
            for ridx in range(len(ranges)):
                ridxstr = str(ridx)
                idxstr = szstr + '_' + multstr + '_' + ridxstr
                func_hprarr[idxstr] = []
                func_rstarr[idxstr] = []

    for k, v in func_hprdict.items():
        [name, env_size] = k.rsplit(".", 1)
        newname = newnamefunc(name)
        count = getcount(newname)
        szstr = str(env_size)
        typemult = gettypemult(newname)
        multstr = str(typemult)
        typen = typename_by_mult(typemult)
        ridx = rangeindex(count)
        ridxstr = str(ridx)
        idxstr = szstr + '_' + multstr + '_' + ridxstr

        func_hprarr[idxstr].append(v)
        func_rstarr[idxstr].append(func_rstdict.get(k, [0]))

        print_or_write(f"{newname} ({idxstr}) {{{env_size}}}")
        hpranal = func_hpranal[k]
        print_or_write(f"hpranal: {hpranal}")
        if too_big_for_rstar.count(newname) == 1:
            print_or_write("rstanal: dead")
            print_or_write("better (inf)\n")
            func_bettercount += 1
        else:
            rstanal = func_rstanal[k]
            print_or_write(f"rstanal: {rstanal}")
            mult = rstanal[0]/hpranal[0]
            if mult < 1:
                print_or_write(f"worse ({mult})\n")
                func_worsecount += 1
            else:
                print_or_write(f"better ({mult})\n")
                func_bettercount += 1
            if mult > func_maxdiff:
                func_maxdiff = mult
                func_maxdiff_name = f"{newname} ({idxstr}) {{{env_size}}}"
            if mult < func_mindiff:
                func_mindiff = mult
                func_mindiff_name = f"{newname} ({idxstr}) {{{env_size}}}"


    print_or_write(f"mindiff: {func_mindiff} [{func_mindiff_name}]\nmaxdiff: {func_maxdiff}  [{func_maxdiff_name}]")
    print_or_write(f"hpr is worse (in average) in {func_worsecount} datasets")
    print_or_write(f"and better (in average) in {func_bettercount} datasets")
    printcommit(f"{outdir}{subdir}{statname}.txt")
    return (func_hprarr, func_rstarr)

In [172]:
buildsubdir = "build/"
builddir = basedir + buildsubdir
ensure_exists(f"{outdir}{buildsubdir}")

buildhprdir = builddir + "hprtree/"
buildrstdir = builddir + "rstar/"

In [173]:
(build_hprdict, build_hpranal) = getbuilddirinfo(buildhprdir, 0.000001)
(build_rstdict, build_rstanal) = getbuilddirinfo(buildrstdir, 0.000001)

In [174]:
build_hprarr = {}
build_rstarr = {}
build_mindiff = 999e999
build_mindiff_name = None
build_maxdiff = -999e999
build_maxdiff_name = None
build_worsecount = 0
build_bettercount = 0

printinit()
for (typen, typesz) in types.items():
    for previous, current in zip(sizes, sizes[1:]):
        key = f"{typen}_{previous}_{current}"
        build_hprarr[key] = []
        build_rstarr[key] = []
        for k, v in build_hprdict.items():
            newname = k.replace("bench_build_", "")
            count = getcount(newname)
            if count > previous and count <= current and gettypemult(newname) == typesz:
                build_hprarr[key].append(v)
                build_rstarr[key].append(build_rstdict.get(k, [0]))
                print_or_write(f"{newname} ({key})")
                hpranal = build_hpranal[k]
                print_or_write(f"hpranal: {hpranal}")
                if too_big_for_rstar.count(newname) == 1:
                    print_or_write("rstanal: dead")
                    print_or_write("better (inf)\n")
                else:
                    rstanal = build_rstanal[k]
                    print_or_write(f"rstanal: {rstanal}")
                    mult = rstanal[0]/hpranal[0]
                    if mult < 1:
                        print_or_write(f"worse ({mult})\n")
                        build_worsecount += 1
                    else:
                        print_or_write(f"better ({mult})\n")
                        build_bettercount += 1
                    if mult > build_maxdiff:
                        build_maxdiff = mult
                        build_maxdiff_name = f"{newname} ({key})"
                    if mult < build_mindiff:
                        build_mindiff = mult
                        build_mindiff_name = f"{newname} ({key})"
        plotdirinfos(build_hprarr[key], build_rstarr[key], f"Build times with elements of {typesz} bytes", key, "Build time in ms", buildsubdir)
print_or_write(f"mindiff: {build_mindiff} [{build_mindiff_name}]\nmaxdiff: {build_maxdiff}  [{build_maxdiff_name}]")
print_or_write(f"hpr is worse (in average) in {build_worsecount} datasets")
print_or_write(f"and better (in average) in {build_bettercount} datasets")
printcommit(f"{outdir}{buildsubdir}buildstats.txt")

In [7]:
delbuilddir = basedir + "build/"
delsubdir = "delete/"
ensure_exists(f"{outdir}{delsubdir}")

delhprdir = delbuilddir + "d_hprtree/"
delrstdir = delbuilddir + "d_rstar/"

In [8]:
(delete_hprdict, delete_hpranal) = getbuilddirinfo(delhprdir, 0.001)
(delete_rstdict, delete_rstanal) = getbuilddirinfo(delrstdir, 0.001)

In [11]:
del_hprarr = {}
del_rstarr = {}
del_mindiff = 999e999
del_mindiff_name = None
del_maxdiff = -999e999
del_maxdiff_name = None
del_worsecount = 0
del_bettercount = 0

del_aggregation = {}

printinit()
for (typen, typesz) in types.items():
    for previous, current in zip(sizes, sizes[1:]):
        key = f"{typen}_{previous}_{current}"
        del_hprarr[key] = []
        del_rstarr[key] = []
        for k, v in delete_hprdict.items():
            newname = k.replace("bench_build_", "")
            count = getcount(newname)
            if count > previous and count <= current and gettypemult(newname) == typesz:
                del_hprarr[key].append(v)
                del_rstarr[key].append(delete_rstdict.get(k, [0]))
                print_or_write(f"{newname} ({key})")
                hpranal = delete_hpranal[k]
                print_or_write(f"hpranal: {hpranal}")
                if too_big_for_rstar.count(newname) == 1:
                    print_or_write("rstanal: dead")
                    print_or_write("better (inf)\n")
                else:
                    rstanal = delete_rstanal[k]
                    print_or_write(f"rstanal: {rstanal}")
                    mult = rstanal[0]/hpranal[0]
                    if mult < 1:
                        print_or_write(f"worse ({mult})\n")
                        del_worsecount += 1
                    else:
                        print_or_write(f"better ({mult})\n")
                        del_bettercount += 1
                    if mult > del_maxdiff:
                        del_maxdiff = mult
                        del_maxdiff_name = f"{newname} ({key})"
                    if mult < del_mindiff:
                        del_mindiff = mult
                        del_mindiff_name = f"{newname} ({key})"
                    rngidx = rangeindex(count)
                    (temp_min, temp_max) = del_aggregation.get(rngidx, (999e999, -999e999))
                    if mult < temp_min:
                        temp_min = mult
                    if mult > temp_max:
                        temp_max = mult
                    del_aggregation[rngidx] = (temp_min, temp_max)
        plotdirinfos(del_hprarr[key], del_rstarr[key], f"Deletion times with elements of {typesz} bytes", key, "Deletion time in us", delsubdir)
print_or_write(f"mindiff: {del_mindiff} [{del_mindiff_name}]\nmaxdiff: {del_maxdiff}  [{del_maxdiff_name}]")
print_or_write(f"hpr is worse (in average) in {del_worsecount} datasets")
print_or_write(f"and better (in average) in {del_bettercount} datasets\n")
print_or_write(f"aggregation per range: {del_aggregation}")
printcommit(f"{outdir}{delsubdir}deletestats.txt")



In [178]:
qallsubdir = "queryall/"
qallbuilddir = basedir + qallsubdir
ensure_exists(f"{outdir}{qallsubdir}")

qallhprdir = qallbuilddir + "/hprtree"
qallrstdir = qallbuilddir + "/rstar"

In [179]:
(qall_hprdict, qall_hpranal) = getbuilddirinfo(qallhprdir, 0.001)
(qall_rstdict, qall_rstanal) = getbuilddirinfo(qallrstdir, 0.001)

In [180]:
qall_hprarr = {}
qall_rstarr = {}
qall_mindiff = 999e999
qall_mindiff_name = None
qall_maxdiff = -999e999
qall_maxdiff_name = None
qall_worsecount = 0
qall_bettercount = 0

printinit()
for (typen, typesz) in types.items():
    for previous, current in zip(sizes, sizes[1:]):
        key = f"{typen}_{previous}_{current}"
        qall_hprarr[key] = []
        qall_rstarr[key] = []
        for k, v in qall_hprdict.items():
            newname = k.replace("bench_queryall_", "")
            count = getcount(newname)
            if count > previous and count <= current and gettypemult(newname) == typesz:
                qall_hprarr[key].append(v)
                qall_rstarr[key].append(qall_rstdict.get(k, [0]))
                print_or_write(f"{newname} ({key})")
                hpranal = qall_hpranal[k]
                print_or_write(f"hpranal: {hpranal}")
                if too_big_for_rstar.count(newname) == 1:
                    print_or_write("rstanal: dead")
                    print_or_write("better (inf)\n")
                else:
                    rstanal = qall_rstanal[k]
                    print_or_write(f"rstanal: {rstanal}")
                    mult = rstanal[0]/hpranal[0]
                    if mult < 1:
                        print_or_write(f"worse ({mult})\n")
                        qall_worsecount += 1
                    else:
                        print_or_write(f"better ({mult})\n")
                        qall_bettercount += 1
                    if mult > qall_maxdiff:
                        qall_maxdiff = mult
                        qall_maxdiff_name = f"{newname} ({key})"
                    if mult < qall_mindiff:
                        qall_mindiff = mult
                        qall_mindiff_name = f"{newname} ({key})"
        plotdirinfos(qall_hprarr[key], qall_rstarr[key], f"Query times for all elements of {typesz} bytes", key, "Query time in us", qallsubdir)
print_or_write(f"mindiff: {qall_mindiff} [{qall_mindiff_name}]\nmaxdiff: {qall_maxdiff}  [{qall_maxdiff_name}]")
print_or_write(f"hpr is worse (in average) in {qall_worsecount} datasets")
print_or_write(f"and better (in average) in {qall_bettercount} datasets")
printcommit(f"{outdir}{qallsubdir}qallstats.txt")

In [None]:
qpresubdir = "querypre/"
qprebuilddir = basedir + qpresubdir
ensure_exists(f"{outdir}{qpresubdir}")

qprehprdir = qprebuilddir + "hprtree/"
qprerstdir = qprebuilddir + "rstar/"

In [181]:
(qpre_hprdict, qpre_hpranal) = getbuilddirinfo(qprehprdir, 0.001)
(qpre_rstdict, qpre_rstanal) = getbuilddirinfo(qprerstdir, 0.001)

In [182]:
(qpre_hprarr, qpre_rstarr) = do_analytics(qpre_hprdict, qpre_rstdict, qpre_hpranal, qpre_rstanal, qpresubdir, "qprestats", lambda e:e.replace("ordered_", "synthetic_180x90x"))

In [183]:
for sz in env_sizes:
    szstr = str(sz)
    for mult in types.values():
        multstr = str(mult)
        for ridx in range(len(ranges)):
            ridxstr = str(ridx)
            idxstr = szstr + '_' + multstr + '_' + ridxstr
            (c, n) = ranges[ridx]
            if len(qpre_hprarr[idxstr]) != 0 and len(qpre_rstarr[idxstr]) != 0:
                plotdirinfos(qpre_hprarr[idxstr], qpre_rstarr[idxstr], f"Query time for envelopes containing {szstr} elements\nwith a size of {mult} bytes and an index size\nbetween {c} and {n} elements", f"{typename_by_mult(mult)}_{c}_{n}-{szstr}", "Query time in us", qpresubdir)

In [18]:
# size_files
sizesubdir = "size/"
szfiledir = basedir + "/szfiles"
ensure_exists(f"{outdir}{sizesubdir}")

sizes_hprtree = {}
with open(szfiledir + "/hprtree") as f:
    for l in f:
        (k, v) = l.split(": ")
        sizes_hprtree[k[20:]] = int(v)
sizes_rstar = {}
with open(szfiledir + "/rstar") as f:
    for l in f:
        (k, v) = l.split(": ")
        sizes_rstar[k[18:]] = int(v)

for k, v in sizes_hprtree.items():
    ret = sizes_rstar.get(k)
    if too_big_for_rstar.count(k) != 1:
        assert ret != None, f"rstar doesn't have {k}"
        assert v < ret
for k, v in sizes_rstar.items():
    ret = sizes_hprtree.get(k)
    assert ret != None, f"hprtree doesn't have {k}"
    assert v > ret

data_size = []
for k, v in sizes_hprtree.items():
    data_size.append((v, theoreticalmin(k), sizes_rstar.get(k, -1)))

for (hpr_sz, min_sz, rst_sz) in data_size:
    if rst_sz != -1:
        assert min_sz < hpr_sz and hpr_sz < rst_sz


In [19]:
data_size.sort(key=lambda e:e[0])
data_hprtree, data_min, data_rstar = zip(*data_size)

data_hpr_diff = [max(a / b, 0)*100 for a, b in zip(data_hprtree, data_min)]
data_rst_diff = [max(a / b, 0)*100 for a, b in zip(data_rstar, data_min)]

In [20]:
save_comb_size = True

def size_multi_plot(start, end, title):
    plt.figure()
    
    prev_end = start[0]

    for (s, e) in zip(start, end):
        l = e - s
        x = [*range(prev_end, prev_end+l)]
        prev_end += l
        plt.scatter(x, data_hpr_diff[s:e], marker="_", linewidths=0.5, color="green", label=hprlab)
        plt.scatter(x, data_rst_diff[s:e], marker="_", linewidths=0.5, color="orange", label=rstlab)

    l, r = plt.xlim()
    plt.hlines([100], [l], [r], linewidth=0.5, alpha=0.5, colors=["purple"], label="Theoretical minimum")
    plt.ylabel("Size in memory in % of theoretical minimum")
    h,l = plt.gca().get_legend_handles_labels()
    plt.legend([*h[:2], h[-1]], [*l[:2], l[-1]], loc=6)

    # plt.title(f"size_{start}_{end}")
    plt.title(title)

    plt.xticks([])
    if save_comb_size:
        _, top = plt.ylim()
        plt.savefig(f"{outdir}{sizesubdir}sizegrp_{top}{imgext}")
        plt.close()
    else:
        plt.show()


size_multi_plot([0, 38, 57, 114, 172, 285, 456, 475, 532], [19, 57, 76, 133, 190, 304, 475, 494, 551], "") # 150 - 450

size_multi_plot([19, 76, 152, 210, 342, 494], [38, 114, 171, 228, 361, 513], "") # 140 - 230

size_multi_plot([133, 190, 228, 247, 266, 304, 323, 361, 380, 399, 419, 437, 514, 551, 571, 589, 608, 627, 647, 666, 684, 704, 722, 742], [152, 209, 247, 266, 285, 323, 342, 380, 399, 418, 437, 456, 532, 570, 589, 608, 627, 646, 665, 684, 703, 722, 741, 760], "") # 100 - 140

size_multi_plot([418, 513, 570, 209, 171], [419, 514, 571, 210, 172], "") # 1600 - 6500

size_multi_plot([646, 665, 703, 741], [647, 666, 704, 742], "") # 0 - 100



In [21]:
data_size.sort(key=lambda e:e[1])
data_hprtree, data_min, data_rstar = zip(*data_size)

data_hpr_diff = [max(a / b, 0)*100 for a, b in zip(data_hprtree, data_min)]
data_rst_diff = [max(a / b, 0)*100 for a, b in zip(data_rstar, data_min)]

save_all_size = True


plt.figure()
sz = len(data_min)
x = [*range(sz)]
plt.scatter(x, data_hpr_diff, marker="_", linewidths=0.5, color="green", label=hprlab)
plt.scatter(x, data_rst_diff, marker="_", linewidths=0.5, color="orange", label=rstlab)
l, r = plt.xlim()
plt.hlines([100], [l], [r], linewidth=0.5, alpha=0.5, colors=["purple"], label="Theoretical minimum")
plt.ylabel("Size in memory in percent of theoretical minimum")
plt.legend()
plt.xticks([])
if save_all_size:
    plt.savefig(f"{outdir}{sizesubdir}size_all_p100{imgext}")
    plt.close()
else:
    plt.title(f"size_all_p100")
    plt.show()

In [22]:
data_min_kib = [ sz / 1024 for sz in data_min]
data_hprtree_kib = [ sz / 1024 for sz in data_hprtree]
data_rstar_kib = [ sz / 1024 for sz in data_rstar]

plt.figure(clear=True)
x = [*range(len(data_min_kib))]
plt.scatter(x, data_hprtree_kib, marker="_", linewidths=0.5, color="green", label="HPRTree")
plt.scatter(x, data_rstar_kib, marker="_", linewidths=0.5, color="orange", label="R*Tree")
plt.plot(data_min_kib, linewidth=0.5, alpha=0.5, color="purple", label="Theoretical minimum")
plt.ylabel("Size in memory in kibibytes")
plt.legend()
plt.xticks([])
if save_all_size:
    plt.savefig(f"{outdir}{sizesubdir}size_all_bytes{imgext}")
    plt.close()
else:
    plt.title(f"size_all_bytes")
    plt.show()


# size_plot(0, 19)# size_plot(19, 38)# size_plot(38, 57)# size_plot(57, 76)# size_plot(76, 114)# size_plot(114, 133)# size_plot(133, 152)# size_plot(152, 171)# size_plot(171, 172)# size_plot(172, 190)# size_plot(190, 209)# size_plot(209, 210)# size_plot(210, 228)# size_plot(228, 247)# size_plot(247, 266)# size_plot(266, 285)# size_plot(285, 304)# size_plot(304, 323)# size_plot(323, 342)# size_plot(342, 361)# size_plot(361, 380)# size_plot(380, 399)# size_plot(399, 418)# size_plot(418, 419)# size_plot(419, 437)# size_plot(437, 456)# size_plot(456, 475)# size_plot(475, 494)# size_plot(494, 513)# size_plot(513, 514)# size_plot(514, 532)# size_plot(532, 551)# size_plot(551, 570)# size_plot(570, 571)# size_plot(571, 589)# size_plot(589, 608)# size_plot(608, 627)# size_plot(627, 646)# size_plot(646, 647)# size_plot(647, 665)# size_plot(665, 666)# size_plot(666, 684)# size_plot(684, 703)# size_plot(703, 704)# size_plot(704, 722)# size_plot(722, 741)# size_plot(741, 742)# size_plot(742, 760)

In [30]:
best_hprtree = 999e999
best_rsttree = 999e999
worst_hprtree = -999e999
worst_rsttree = -999e999

for (d_hpr, d_rst) in zip(data_hpr_diff, data_rst_diff):
    if best_hprtree > d_hpr:
        best_hprtree = d_hpr
    if worst_hprtree < d_hpr:
        worst_hprtree = d_hpr
    if d_rst != 0:
        if best_rsttree > d_rst:
            best_rsttree = d_rst
        if worst_rsttree < d_rst:
            worst_rsttree = d_rst

best_comp = 999e999
worst_comp = -999e999

for (d_hpr, d_rst) in zip(data_hprtree, data_rstar):
    if d_rst != -1:
        comp = d_hpr / d_rst
        if comp < best_comp:
            best_comp = comp
        if comp > worst_comp:
            worst_comp = comp

printinit()
print_or_write(f"best_hprtree: {best_hprtree}\nworst_hprtree: {worst_hprtree}\n\nbest_rsttree: {best_rsttree}\nworst_rsttree: {worst_rsttree}\n")
print_or_write(f"best_comp: {best_comp}\nworst_comp: {worst_comp}")
printcommit(f"{outdir}{sizesubdir}sizestats.txt")

In [None]:
# def plot(data, mean, std, ylabel, lineon=None):
#     plt.figure()
#     plt.scatter(range(len(data)), data, marker=".", s=0.8)
#     # plt.plot(data)
#     plt.ylabel(ylabel)
#     plt.xlabel("N=" + str(len(data)))
#     if lineon != None:
#         for line in lineon:
#             plt.axhline(
#                 line[0], color=line[1], linewidth=0.9, ls=(0, (1, 5))
#             ).set_label(line[2])
#     plt.axhline(mean, color="red", linewidth=0.7, ls="--").set_label("mean")
#     plt.axhline(mean + std, color="purple", linewidth=0.7).set_label(
#         "mean±standard deviation"
#     )
#     plt.axhline(mean - std, color="purple", linewidth=0.7)
#     plt.axhspan(mean + std, mean - std, facecolor="purple", alpha=0.1)
#     plt.xticks([])
#     plt.legend()
#     plt.show()


# def plot_sorted(data, mean, std, ylabel, lineon=None):
#     plt.figure()
#     plt.plot(data.sort_values(by=[data.columns[0]], ascending=True, ignore_index=True))
#     plt.ylabel(ylabel)
#     plt.xlabel("CDF\nN=" + str(len(data)))
#     if lineon != None:
#         for line in lineon:
#             plt.axhline(
#                 line[0], color=line[1], linewidth=0.9, ls=(0, (1, 5))
#             ).set_label(line[2])
#     plt.axhline(mean, color="red", linewidth=0.7, ls="--").set_label("mean")
#     plt.axhline(mean + std, color="purple", linewidth=0.7).set_label(
#         "mean±standard deviation"
#     )
#     plt.axhline(mean - std, color="purple", linewidth=0.7)
#     plt.axhspan(mean + std, mean - std, facecolor="purple", alpha=0.1)
#     plt.xticks(
#         [0, len(data) / 4, len(data) / 2, len(data) / (4 / 3), len(data)],
#         ["0.0", "0.25", "0.5", "0.75", "1.0"],
#     )
#     plt.legend()
#     plt.show()

In [None]:
# detect_timings_ns = pd.read_csv('../stats/detect_timings.txt')
# copy_timings_ns = pd.read_csv('../stats/copy_timings.txt')
# startup_timings_ns = pd.read_csv('../stats/startup_timings.txt')
# crop_timings_ns = pd.read_csv('../stats/crop_timings.txt')

In [None]:
# detect_timings_us = detect_timings_ns / 1000
# detect_timings_ms = detect_timings_us / 1000
# detect_timings_mean = detect_timings_ms.mean()[0]
# print("detect_timings_mean: " + str(detect_timings_mean))
# detect_timings_std = detect_timings_ms.std()[0]
# print("detect_timings_std: " + str(detect_timings_std))
# print("min: " + str(detect_timings_ms.min()[0]))
# print("max: " + str(detect_timings_ms.max()[0]))
# print("N: " + str(len(detect_timings_ns)))

# print("\n")

# copy_timings_us = copy_timings_ns / 1000
# copy_timings_mean = copy_timings_us.mean()[0]
# print("copy_timings_mean: " + str(copy_timings_mean))
# copy_timings_std = copy_timings_us.std()[0]
# print("copy_timings_std: " + str(copy_timings_std))
# print("min: " + str(copy_timings_us.min()[0]))
# print("max: " + str(copy_timings_us.max()[0]))
# print("N: " + str(len(copy_timings_ns)))

# print("\n")

# startup_timings_us = startup_timings_ns / 1000
# startup_timings_ms = startup_timings_us / 1000
# startup_timings_mean = startup_timings_ms.mean()[0]
# print("startup_timings_mean: " + str(startup_timings_mean))
# startup_timings_std = startup_timings_ms.std()[0]
# print("startup_timings_std: " + str(startup_timings_std))
# print("min: " + str(startup_timings_ms.min()[0]))
# print("max: " + str(startup_timings_ms.max()[0]))
# print("N: " + str(len(startup_timings_ns)))

# print("\n")

# crop_timings_us = crop_timings_ns / 1000
# crop_timings_ms = crop_timings_us / 1000
# crop_timings_mean = crop_timings_ms.mean()[0]
# print("crop_timings_mean: " + str(crop_timings_mean))
# crop_timings_std = crop_timings_ms.std()[0]
# print("crop_timings_std: " + str(crop_timings_std))
# print("min: " + str(crop_timings_ms.min()[0]))
# print("max: " + str(crop_timings_ms.max()[0]))
# print("N: " + str(len(crop_timings_ns)))

In [None]:
# plot(detect_timings_ms, detect_timings_mean, detect_timings_std, 'Detection timings in ms')
# plot_sorted(detect_timings_ms, detect_timings_mean, detect_timings_std, 'Detection timings in ms')

In [None]:
# plot(copy_timings_us, copy_timings_mean, copy_timings_std, 'Copy timings in us')
# plot_sorted(copy_timings_us, copy_timings_mean, copy_timings_std, 'Copy timings in us')

In [None]:
# plot(startup_timings_ms, startup_timings_mean, startup_timings_std, 'Startup timings in ms')
# plot_sorted(startup_timings_ms, startup_timings_mean, startup_timings_std, 'Startup timings in ms')

In [None]:
# plot(crop_timings_ms, crop_timings_mean, crop_timings_std, 'Crop timings in ms', [[16.666, 'green', '60FPS capable'], [33.333, 'orange', '30FPS capable']])
# plot_sorted(crop_timings_ms, crop_timings_mean, crop_timings_std, 'Crop timings in ms', [[16.666, 'green', '60FPS capable'], [33.333, 'orange', '30FPS capable']])