In [1]:
import matplotlib
import pandas as pd
from matplotlib import pyplot as plt
# https://towardsdatascience.com/how-to-create-and-customize-venn-diagrams-in-python-263555527305
from matplotlib_venn import venn2, venn2_circles
from matplotlib_venn import venn3, venn3_circles

import numpy as np

from extract.jobs import parse_mallob, get_baseline
from extract.threads import parse_threads
from extract.threads_old import parse_threads_old

matplotlib.use("pgf")
matplotlib.rcParams.update({
    "pgf.texsystem": "pdflatex",
    'font.family': 'serif',
    'text.usetex': True,
    'pgf.rcfonts': False,
})

In [2]:
plt.close("all")

In [3]:
dynamic_2_pair = parse_threads_old("benchmark/dynamic_2/")
dynamic_4_pair = parse_threads("benchmark/dynamic_4/")
dynamic_8_pair = parse_threads_old("benchmark/dynamic_8/")

In [19]:
# Wartezeiten
# Prozentuale Zeit wie lange solver threads eines Jobs warten mussten im Vergleich zu ihrer gesamtzeit
# Entweder werden cubes viel zu schnell gelöst oder brauchen ewig zum erzeugt werden
fig = plt.figure()
fig.subplots_adjust(bottom=0.2, left=0.175)

summe = pd.DataFrame(dynamic_2_pair[1]).groupby('job').sum()
proz = summe["wait_time"] / summe["run_time"] * 100
ax = proz.fillna(0).sort_values().reset_index()[0].plot(color="r", figsize=(3,2))

summe = pd.DataFrame(dynamic_4_pair[1]).groupby('job').sum()
proz = summe["wait_time"] / summe["run_time"] * 100
ax = proz.fillna(0).sort_values().reset_index()[0].plot(color="g", ax=ax)

summe = pd.DataFrame(dynamic_8_pair[1]).groupby('job').sum()
proz = summe["wait_time"] / summe["run_time"] * 100
ax = proz.fillna(0).sort_values().reset_index()[0].plot(color="b", ax=ax)

ax.set_yticks([0,50,100])

ax.set_xlabel("problems")
ax.set_ylabel("time spent waiting in %")

ax.legend(['$J=2$', '$J=4$', '$J=8$'])

fig.savefig("dynamic_plots/wait.pgf")

In [18]:
# Aktivzeit der Generatoren Threads
fig = plt.figure()
fig.subplots_adjust(bottom=0.2, left=0.175)

summe = pd.DataFrame(dynamic_2_pair[0]).groupby('job').sum()
proz = (summe["run_time"] - summe["idle_time"] - summe["wait_time"]) / summe["run_time"] * 100
ax = proz.fillna(0).sort_values().reset_index()[0].plot(color="r", figsize=(3,2))

summe = pd.DataFrame(dynamic_4_pair[0]).groupby('job').sum()
proz = (summe["run_time"] - summe["idle_time"] - summe["wait_time"]) / summe["run_time"] * 100
ax = proz.fillna(0).sort_values().reset_index()[0].plot(color="g", ax=ax)

summe = pd.DataFrame(dynamic_8_pair[0]).groupby('job').sum()
proz = (summe["run_time"] - summe["idle_time"] - summe["wait_time"]) / summe["run_time"] * 100
ax = proz.fillna(0).sort_values().reset_index()[0].plot(color="b", ax=ax)

ax.set_yticks([0,50,100])

ax.set_xlabel("problems")
ax.set_ylabel("time spent generating in %")

ax.legend(['$J=2$', '$J=4$', '$J=8$'])

fig.savefig("dynamic_plots/generate.pgf")

In [7]:
dynamic_2_results_df = pd.DataFrame(parse_mallob("benchmark/dynamic_2"))
dynamic_4_results_df = pd.DataFrame(parse_mallob("benchmark/dynamic_4"))
dynamic_8_results_df = pd.DataFrame(parse_mallob("benchmark/dynamic_8"))

In [11]:
two = dynamic_2_results_df.set_index("identifier")["result"].rename("two")
four = dynamic_4_results_df.set_index("identifier")["result"].rename("four")
eight = dynamic_8_results_df.set_index("identifier")["result"].rename("eight")

result = pd.concat([two, four, eight], axis=1, join="outer").fillna("UNKNOWN")

result.groupby(['two', 'four', 'eight']).size().reset_index().rename(columns={0:'count'})

Unnamed: 0,two,four,eight,count
0,SAT,SAT,SAT,109
1,SAT,SAT,UNKNOWN,12
2,SAT,UNKNOWN,SAT,3
3,SAT,UNKNOWN,UNKNOWN,5
4,UNKNOWN,SAT,SAT,2
5,UNKNOWN,SAT,UNKNOWN,6
6,UNKNOWN,UNKNOWN,SAT,2
7,UNKNOWN,UNKNOWN,UNKNOWN,175
8,UNKNOWN,UNKNOWN,UNSAT,1
9,UNKNOWN,UNSAT,UNSAT,2


In [16]:
fig = plt.figure()

# Schnittmenge der gelösten (Löst es die gleichen Instanzen?)
# default colors anschauen und dann die passenden übernehmen
venn3(subsets = (5 + 3, 6, 12 + 4, 2 + 1, 3, 2 + 2, 109 + 75), set_labels = ('$J=2$', '$J=4$', '$J=8$'), set_colors=('r', 'g', 'b'))

fig.set_size_inches(w=3, h=2)
fig.savefig("dynamic_plots/venn.pgf")

In [17]:
# Plot solved over time
fig = plt.figure()
fig.subplots_adjust(bottom=0.2, left=0.175)

temp_series = dynamic_2_results_df[dynamic_2_results_df["result"]!="UNKNOWN"].sort_values("duration").reset_index()["duration"]
temp_series = pd.Series(dict((v,k) for k,v in temp_series.iteritems()))
ax = temp_series.plot(color="r", figsize=(3,2))

temp_series = dynamic_4_results_df[dynamic_4_results_df["result"]!="UNKNOWN"].sort_values("duration").reset_index()["duration"]
temp_series = pd.Series(dict((v,k) for k,v in temp_series.iteritems()))
temp_series.plot(color="g", ax=ax)

temp_series = dynamic_8_results_df[dynamic_8_results_df["result"]!="UNKNOWN"].sort_values("duration").reset_index()["duration"]
temp_series = pd.Series(dict((v,k) for k,v in temp_series.iteritems()))
temp_series.plot(color="b", ax=ax)

ax.set_xlabel("run time in s")
ax.set_ylabel("\# solved problems in $\leq t$ s")

ax.legend(['$J=2$', '$J=4$', '$J=8$'])

fig.savefig("dynamic_plots/time.pgf")

In [13]:
print("lbc = 2")
blub = dynamic_2_results_df[dynamic_2_results_df["result"]!="UNKNOWN"]["size_of_added_buffer"]
print(pd.cut(blub, bins=[0, 0.5, 10000000], include_lowest=True).value_counts())

print("lbc = 4")
blub = dynamic_4_results_df[dynamic_4_results_df["result"]!="UNKNOWN"]["size_of_added_buffer"]
print(pd.cut(blub, bins=[0, 0.5, 10000000], include_lowest=True).value_counts())

print("lbc = 8")
blub = dynamic_8_results_df[dynamic_8_results_df["result"]!="UNKNOWN"]["size_of_added_buffer"]
print(pd.cut(blub, bins=[0, 0.5, 10000000], include_lowest=True).value_counts())

lbc = 2
(-0.001, 0.5]        142
(0.5, 10000000.0]     69
Name: size_of_added_buffer, dtype: int64
lbc = 4
(-0.001, 0.5]        157
(0.5, 10000000.0]     53
Name: size_of_added_buffer, dtype: int64
lbc = 8
(-0.001, 0.5]        157
(0.5, 10000000.0]     37
Name: size_of_added_buffer, dtype: int64


In [10]:
print("lbc = 2")
blub = dynamic_2_results_df[dynamic_2_results_df["result"]!="UNKNOWN"]["size_of_used_cube"]
print(pd.cut(blub, bins=[0, 0.5, 10000000], include_lowest=True).value_counts())

print("lbc = 4")
blub = dynamic_4_results_df[dynamic_4_results_df["result"]!="UNKNOWN"]["size_of_used_cube"]
print(pd.cut(blub, bins=[0, 0.5, 10000000], include_lowest=True).value_counts())

print("lbc = 8")
blub = dynamic_8_results_df[dynamic_8_results_df["result"]!="UNKNOWN"]["size_of_used_cube"]
print(pd.cut(blub, bins=[0, 0.5, 10000000], include_lowest=True).value_counts())

lbc = 2
(0.5, 10000000.0]    129
(-0.001, 0.5]         82
Name: size_of_used_cube, dtype: int64
lbc = 4
(0.5, 10000000.0]    123
(-0.001, 0.5]         87
Name: size_of_used_cube, dtype: int64
lbc = 8
(-0.001, 0.5]        98
(0.5, 10000000.0]    96
Name: size_of_used_cube, dtype: int64
