In [None]:
# to evaluate the error
# negative error never happens
%matplotlib inline
import matplotlib.pyplot as plt
import subprocess
import os
import random
import shutil
from collections import Counter
import statistics
from numpy import linspace

DATASETS_DIR = "../tests/input_files/"
# different sets for precision and performance tests
DATASETS =['test_100_25_15', 'test_100_25_25', 'test_100_25_35', 'test_100_25_50',
           'test_100_25_75', 'test_100_50_15', 'test_100_50_25', 'test_100_50_35',
           'test_100_50_50', 'test_100_50_75']

print(DATASETS)

In [None]:
# run and collect data
CMD_TEMPL = "../CSP {} {} -v 1"
dataset_data = {}


def get_answer(ans, filename, f=None):
    """Get the smallest program answer."""
    cmd = CMD_TEMPL.format(filename, ans)
    if f:
        cmd += " -f"
    csp_out = subprocess.check_output(cmd, shell=True).decode("utf-8").split("\n")
    answ = csp_out[-3]
    if answ == "True":
        return ans
    else:
        max_cov_line = csp_out[-5].split()
        exp = int(max_cov_line[-1])
        real = int(max_cov_line[-3])
        diff = exp - real
        return ans + diff


datasets_num = len(DATASETS)
for num, dataset in enumerate(DATASETS):
    print("Dataset {} / {} in progress".format(num + 1, datasets_num))
    dataset_metadata = dataset.split("_")
    str_len = int(dataset_metadata[1])
    str_num = int(dataset_metadata[2])
    answer = int(dataset_metadata[3])
    dataset_data[num] = {"answers": [],
                         "answers_f": [],
                         "data": {"str_len": str_len,
                                  "str_num": str_num,
                                  "answer": answer}}
    dataset_dir = os.path.join(DATASETS_DIR, dataset)
    contents = os.listdir(dataset_dir)
    for fle in contents:
        f_path = os.path.join(dataset_dir, fle)
        k = get_answer(answer, f_path)
        k_f = get_answer(answer, f_path, f=True)
        dataset_data[num]["answers"].append(k)
        dataset_data[num]["answers_f"].append(k_f)

In [None]:
# compare answers with and without F flag
all_answers = []
all_f_answers = []
for k, v in dataset_data.items():
    all_answers.extend(v["answers"])
    all_f_answers.extend(v["answers_f"])

fig = plt.figure(figsize=(10, 10))
ax = fig.add_subplot(1, 1, 1)
ax.scatter(all_answers, all_f_answers, s=3)
x = linspace(*ax.get_xlim())
ax.plot(x, x)
ax.set_xlabel("Answer without F flag")
ax.set_ylabel("Answer with F flag")
ax.grid()

In [None]:
# visualize the error
for k, v in dataset_data.items():
    v["errors_f"] = [a - v["data"]["answer"] for a in v["answers_f"]]
    v["errors"] = [a - v["data"]["answer"] for a in v["answers"]]

fig = plt.figure(figsize=(20, 8))
to_plot = []
titles = []

perc_errors = []
perc_errors_f = []
answers = []

for num, (k, v) in enumerate(dataset_data.items(), 1):
    # ax = fig.add_subplot(rows, cols, num)
    ans = v["data"]["answer"]
    ans_arr = [ans for _ in range(len(v["errors"]))]
    answers.extend(ans_arr)

    perc_error = [e / ans * 100 for e in v["errors"]]
    perc_error_f = [e / ans * 100 for e in v["errors_f"]]
    perc_errors.extend(perc_error)
    perc_errors_f.extend(perc_error_f)

    to_plot.append(perc_error)
    to_plot.append(perc_error_f)

    title = "{} {} {}".format(v["data"]["str_len"], v["data"]["str_num"], ans)
    titles.append(title)
    titles.append(title + "F")
    # ax.set_title(title)
ax_1 = fig.add_subplot(1, 1, 1)

pos = list(range(1, len(titles) + 1))
vp = ax_1.violinplot(to_plot, pos)
ax_1.set_title("Errors with and without -f flag")
ax_1.set_ylabel("Error %")

ax_1.set_xticks(pos)
ax_1.set_xticklabels(titles, rotation=45, fontsize=8)
ax_1.grid(axis="y")

colors = ["navy", "navy",
          "purple", "purple",
          "orange", "orange",
          "green", "green",
          "pink", "pink"] * 2
for vb, color in zip(vp['bodies'], colors):
    vb.set_facecolor(color)
    vb.set_edgecolor("grey")
plt.show()