In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import subprocess
import os
from collections import defaultdict
from datetime import datetime as dt
from collections import Counter
from math import log

In [None]:
DATA_DIR = "performance"
DATASETS = [x for x in os.listdir(DATA_DIR) if x.startswith("test")]
print(len(DATASETS))
print(DATASETS[:10])

In [None]:
arr_sizes = []
ans_fracts = []
max_vals = []
answers = []
found = []
runtimes = []
densities = []
failed_cmds = []


datasets_num = len(DATASETS)
for num, dataset in enumerate(DATASETS):
    dt_0 = dt.now()
    dataset_path = os.path.join(DATA_DIR, dataset)
    dataset_info = dataset.split("_")
    arr_size = int(dataset_info[1])
    ans_fract = int(dataset_info[2])
    max_val = int(dataset_info[3])
    dataset_files = os.listdir(dataset_path)
    for dfile in dataset_files:
        dfile_path = os.path.join(dataset_path, dfile)
        with open(dfile_path, "r") as f:
            answer = int(f.readlines()[0].split()[-1])
        # TODO: smarter output parsing
        cmd = f"./SSP.py {dfile_path} {answer} -d --gd"
        t_0 = dt.now()
        try:
            cmd_out = subprocess.check_output(cmd, shell=True).decode("utf-8").split("\n")
            runtime = dt.now() - t_0
            res = False if "None" in cmd_out[-1] else True
            density = float(cmd_out[1].split()[-1])
            arr_sizes.append(arr_size)
            ans_fracts.append(ans_fract)
            max_vals.append(max_val)
            answers.append(answer)
            found.append(res)
            runtimes.append(runtime)
            densities.append(density)
        except subprocess.CalledProcessError:
            failed_cmds.append(cmd)
    print(f"Dataset {num + 1} / {datasets_num} done in {dt.now() - dt_0}")

In [None]:
runtimes_sec = []
for rt in runtimes:
    rt_data = str(rt).split(":")
    s = float(rt_data[2])
    m = int(rt_data[1])
    h = int(rt_data[0])
    tottime = h * 60 * 60 + m * 60 + s
    runtimes_sec.append(tottime)

In [None]:
print("\n".join(failed_cmds))

In [None]:
f = open("results.tsv", "w")
f.write("arr_size\tans_fract\tmaxval\tanswer\tfound\truntime\tdensity\n")
for elem in zip(arr_sizes, ans_fracts, max_vals, answers, found, runtimes, densities):
    f.write("\t".join([str(x) for x in elem]))
    f.write("\n")
f.close()

In [None]:
plt.hist(densities, bins=100)
plt.xlabel("Density")
plt.show()

print(f"Min density: {min(densities)}; max: {max(densities)}")

In [None]:
print(Counter(found))

In [None]:
fig = plt.figure(figsize=(15, 10))
ax_1 = fig.add_subplot(1, 1, 1)
ax_1.scatter(arr_sizes, runtimes_sec, color='blue', alpha=0.25)
ax_1.set_xlabel("array size")
ax_1.set_ylabel('runtime, sec')
ax_1.set_ylim(0, 20)
ax_1.grid()
plt.show()

In [None]:
fig = plt.figure(figsize=(15, 10))
ax_1 = fig.add_subplot(1, 1, 1)
ax_1.scatter(arr_sizes, runtimes_sec, color='blue', alpha=0.25)
ax_1.set_xlabel("array size")
ax_1.set_ylabel('runtime, sec')
ax_1.set_ylim(0.05, 0.2)
ax_1.grid()

plt.show()

In [None]:
longs = [x for x in zip(arr_sizes, runtimes_sec) if x[1] > 0.2]
long_rt = [x[1] for x in longs]
long_as = [x[0] for x in longs]
print(len(longs))

In [None]:
fig = plt.figure(figsize=(15, 10))
ax_1 = fig.add_subplot(1, 1, 1)
ax_1.scatter(long_as, long_rt, color='blue', alpha=0.25, label='worst cases')
ax_1.set_xlabel("array size")
ax_1.set_ylabel('runtime, sec')
ax_1.set_xlim(0, 2000)
ax_1.set_ylim(0, 150)
ax_1.grid()
penta = lambda x: x ** 3 / 50000000

X = list(range(1, 2000, 10))
Y = [penta(x) for x in X]
ax_1.plot(X, Y, color="red", label="x^3 / C")
ax_1.legend()
plt.show()