In [32]:
import matplotlib
import pandas as pd
from matplotlib import pyplot as plt
# https://towardsdatascience.com/how-to-create-and-customize-venn-diagrams-in-python-263555527305
from matplotlib_venn import venn2, venn2_circles
from matplotlib_venn import venn3, venn3_circles

import numpy as np

from extract.jobs import parse_mallob, get_baseline
from extract.threads import parse_threads
from extract.threads_old import parse_threads_old
from extract.mallob import parse_dominik

matplotlib.use("pgf")
matplotlib.rcParams.update({
    "pgf.texsystem": "pdflatex",
    'font.family': 'serif',
    'text.usetex': True,
    'pgf.rcfonts': False,
})

In [2]:
plt.close("all")

In [3]:
dynamic_2_pair = parse_threads_old("benchmark/dynamic_2/")
dynamic_4_pair = parse_threads("benchmark/dynamic_4/")
dynamic_8_pair = parse_threads_old("benchmark/dynamic_8/")

In [30]:
# Wartezeiten
# Prozentuale Zeit wie lange solver threads eines Jobs warten mussten im Vergleich zu ihrer gesamtzeit
# Entweder werden cubes viel zu schnell gelöst oder brauchen ewig zum erzeugt werden
fig = plt.figure()
fig.subplots_adjust(bottom=0.2, left=0.175)

summe = pd.DataFrame(dynamic_2_pair[1]).groupby('job').sum()
proz = summe["wait_time"] / summe["run_time"] * 100
ax = proz.fillna(0).sort_values().reset_index()[0].plot(color="#377eb8", style="-.", figsize=(3,2))

summe = pd.DataFrame(dynamic_4_pair[1]).groupby('job').sum()
proz = summe["wait_time"] / summe["run_time"] * 100
proz.fillna(0).sort_values().reset_index()[0].plot(color="#ff7f00", style=":", ax=ax)

summe = pd.DataFrame(dynamic_8_pair[1]).groupby('job').sum()
proz = summe["wait_time"] / summe["run_time"] * 100
proz.fillna(0).sort_values().reset_index()[0].plot(color="#e41a1c", style="--", ax=ax)

ax.set_yticks([0,50,100])

ax.set_xlabel("problems")
ax.set_ylabel("time spent waiting in %")

ax.set_xticks([0, 100, 200, 300, 400])
ax.set_xlim(left=0, right=400)
ax.set_yticks([0, 25, 50, 75, 100])
ax.set_ylim(bottom=0, top=100)

ax.grid(axis="y")

ax.legend(['$J=2$', '$J=4$', '$J=8$'])

fig.savefig("dynamic_plots/wait.pdf")
fig.savefig("dynamic_plots/wait.pgf")

In [29]:
# Aktivzeit der Generatoren Threads
fig = plt.figure()
fig.subplots_adjust(bottom=0.2, left=0.175)

summe = pd.DataFrame(dynamic_2_pair[0]).groupby('job').sum()
proz = (summe["run_time"] - summe["idle_time"] - summe["wait_time"]) / summe["run_time"] * 100
ax = proz.fillna(0).sort_values().reset_index()[0].plot(color="#377eb8", style="-.", figsize=(3,2))

summe = pd.DataFrame(dynamic_4_pair[0]).groupby('job').sum()
proz = (summe["run_time"] - summe["idle_time"] - summe["wait_time"]) / summe["run_time"] * 100
proz.fillna(0).sort_values().reset_index()[0].plot(color="#ff7f00", style=":", ax=ax)

summe = pd.DataFrame(dynamic_8_pair[0]).groupby('job').sum()
proz = (summe["run_time"] - summe["idle_time"] - summe["wait_time"]) / summe["run_time"] * 100
proz.fillna(0).sort_values().reset_index()[0].plot(color="#e41a1c", style="--", ax=ax)

ax.set_yticks([0,50,100])

ax.set_xlabel("problems")
ax.set_ylabel("time spent generating in %")

ax.set_xticks([0, 100, 200, 300, 400])
ax.set_xlim(left=0, right=400)
ax.set_yticks([0, 25, 50, 75, 100])
ax.set_ylim(bottom=0, top=100)

ax.grid(axis="y")

ax.legend(['$J=2$', '$J=4$', '$J=8$'])

fig.savefig("dynamic_plots/generate.pdf")
fig.savefig("dynamic_plots/generate.pgf")

  fig = plt.figure()


In [6]:
dynamic_2_results_df = pd.DataFrame(parse_mallob("benchmark/dynamic_2"))
dynamic_4_results_df = pd.DataFrame(parse_mallob("benchmark/dynamic_4"))
dynamic_8_results_df = pd.DataFrame(parse_mallob("benchmark/dynamic_8"))

In [12]:
baseline_df = pd.DataFrame(get_baseline("benchmark/baseline.txt"))

In [7]:
two = dynamic_2_results_df.set_index("identifier")["result"].rename("two")
four = dynamic_4_results_df.set_index("identifier")["result"].rename("four")
eight = dynamic_8_results_df.set_index("identifier")["result"].rename("eight")

result = pd.concat([two, four, eight], axis=1, join="outer").fillna("UNKNOWN")

result.groupby(['two', 'four', 'eight']).size().reset_index().rename(columns={0:'count'})

Unnamed: 0,two,four,eight,count
0,SAT,SAT,SAT,109
1,SAT,SAT,UNKNOWN,12
2,SAT,UNKNOWN,SAT,3
3,SAT,UNKNOWN,UNKNOWN,5
4,UNKNOWN,SAT,SAT,2
5,UNKNOWN,SAT,UNKNOWN,6
6,UNKNOWN,UNKNOWN,SAT,2
7,UNKNOWN,UNKNOWN,UNKNOWN,175
8,UNKNOWN,UNKNOWN,UNSAT,1
9,UNKNOWN,UNSAT,UNSAT,2


In [8]:
fig = plt.figure()

# Schnittmenge der gelösten (Löst es die gleichen Instanzen?)
# default colors anschauen und dann die passenden übernehmen
venn3(subsets = (5 + 3, 6, 12 + 4, 2 + 1, 3, 2 + 2, 109 + 75), set_labels = ('$J=2$', '$J=4$', '$J=8$'), set_colors=('#377eb8', '#ff7f00', '#e41a1c'))

fig.set_size_inches(w=3, h=2)

fig.savefig("dynamic_plots/venn.pdf")
fig.savefig("dynamic_plots/venn.pgf")

In [22]:
# Plot solved over time
fig = plt.figure()
fig.subplots_adjust(bottom=0.2, left=0.175)

temp_series = dynamic_2_results_df[dynamic_2_results_df["result"]!="UNKNOWN"].sort_values("duration").reset_index()["duration"]
temp_series = pd.Series(dict((v,k) for k,v in temp_series.iteritems()))
ax = temp_series.plot(color="#377eb8", style="-.", figsize=(3,2))

temp_series = dynamic_4_results_df[dynamic_4_results_df["result"]!="UNKNOWN"].sort_values("duration").reset_index()["duration"]
temp_series = pd.Series(dict((v,k) for k,v in temp_series.iteritems()))
temp_series.plot(color="#ff7f00", style=":", ax=ax)

temp_series = dynamic_8_results_df[dynamic_8_results_df["result"]!="UNKNOWN"].sort_values("duration").reset_index()["duration"]
temp_series = pd.Series(dict((v,k) for k,v in temp_series.iteritems()))
temp_series.plot(color="#e41a1c", style="--", ax=ax)

temp_series = baseline_df[baseline_df["result"]!="UNKNOWN"].sort_values("duration").reset_index()["duration"]
temp_series = pd.Series(dict((v,k) for k,v in temp_series.iteritems()))
temp_series.plot(color="#f781bf", style="-", ax=ax)

ax.set_xlabel("run time in s")
ax.set_ylabel("\# solved problems in $\leq t$ s")

ax.set_xticks([0, 250, 500, 750, 1000])
ax.set_xlim(left=0, right=1000)
ax.set_yticks([0, 50, 100, 150, 200])
ax.set_ylim(bottom=0)

ax.grid(axis="y")

ax.legend(['$J=2$', '$J=4$', '$J=8$', 'Cadical'])

fig.savefig("dynamic_plots/time.pdf")
fig.savefig("dynamic_plots/time.pgf")

In [10]:
print("lbc = 2")
blub = dynamic_2_results_df[dynamic_2_results_df["result"]!="UNKNOWN"]["size_of_added_buffer"]
print(pd.cut(blub, bins=[0, 0.5, 10000000], include_lowest=True).value_counts())

print("lbc = 4")
blub = dynamic_4_results_df[dynamic_4_results_df["result"]!="UNKNOWN"]["size_of_added_buffer"]
print(pd.cut(blub, bins=[0, 0.5, 10000000], include_lowest=True).value_counts())

print("lbc = 8")
blub = dynamic_8_results_df[dynamic_8_results_df["result"]!="UNKNOWN"]["size_of_added_buffer"]
print(pd.cut(blub, bins=[0, 0.5, 10000000], include_lowest=True).value_counts())

lbc = 2
(-0.001, 0.5]        142
(0.5, 10000000.0]     69
Name: size_of_added_buffer, dtype: int64
lbc = 4
(-0.001, 0.5]        157
(0.5, 10000000.0]     53
Name: size_of_added_buffer, dtype: int64
lbc = 8
(-0.001, 0.5]        157
(0.5, 10000000.0]     37
Name: size_of_added_buffer, dtype: int64


In [11]:
print("lbc = 2")
blub = dynamic_2_results_df[dynamic_2_results_df["result"]!="UNKNOWN"]["size_of_used_cube"]
print(pd.cut(blub, bins=[0, 0.5, 10000000], include_lowest=True).value_counts())

print("lbc = 4")
blub = dynamic_4_results_df[dynamic_4_results_df["result"]!="UNKNOWN"]["size_of_used_cube"]
print(pd.cut(blub, bins=[0, 0.5, 10000000], include_lowest=True).value_counts())

print("lbc = 8")
blub = dynamic_8_results_df[dynamic_8_results_df["result"]!="UNKNOWN"]["size_of_used_cube"]
print(pd.cut(blub, bins=[0, 0.5, 10000000], include_lowest=True).value_counts())

lbc = 2
(0.5, 10000000.0]    129
(-0.001, 0.5]         82
Name: size_of_used_cube, dtype: int64
lbc = 4
(0.5, 10000000.0]    123
(-0.001, 0.5]         87
Name: size_of_used_cube, dtype: int64
lbc = 8
(-0.001, 0.5]        98
(0.5, 10000000.0]    96
Name: size_of_used_cube, dtype: int64


In [33]:
# Let us have a look in mallob
mallob_2_df = pd.DataFrame(parse_dominik("dominik/lbc=2/thread_logs"))
mallob_4_df = pd.DataFrame(parse_dominik("dominik/lbc=4/thread_logs"))
mallob_8_df = pd.DataFrame(parse_dominik("dominik/lbc=8/thread_logs"))

In [101]:
static_2_df = pd.DataFrame(parse_mallob("benchmark/static_5_1_2_random"))
static_4_df = pd.DataFrame(parse_mallob("benchmark/static_4_1_4_new"))
static_8_df = pd.DataFrame(parse_mallob("benchmark/static_3_1_8_random"))

In [111]:
# Plot solved over time
fig = plt.figure()
fig.subplots_adjust(bottom=0.15, left=0.1, right=0.75)

temp_series = static_2_df[static_2_df["result"]!="UNKNOWN"].sort_values("duration").reset_index()["duration"]
temp_series = pd.Series(dict((v,k) for k,v in temp_series.iteritems()))
ax = temp_series.plot(color="#377eb8", style="-.", marker="^", fillstyle="none", markevery=10, figsize=(6,3))

temp_series = static_4_df[static_4_df["result"]!="UNKNOWN"].sort_values("duration").reset_index()["duration"]
temp_series = pd.Series(dict((v,k) for k,v in temp_series.iteritems()))
temp_series.plot(color="#ff7f00", style=":", marker="s", fillstyle="none", markevery=10, ax=ax)

temp_series = static_8_df[static_8_df["result"]!="UNKNOWN"].sort_values("duration").reset_index()["duration"]
temp_series = pd.Series(dict((v,k) for k,v in temp_series.iteritems()))
temp_series.plot(color="#e41a1c", style="--", marker="o", fillstyle="none", markevery=10, ax=ax)

temp_series = mallob_2_df[mallob_2_df["result"]!="UNKNOWN"].sort_values("duration").reset_index()["duration"]
temp_series = pd.Series(dict((v,k) for k,v in temp_series.iteritems()))
temp_series.plot(color="#a65628", style="-.", marker="+", fillstyle="none", markevery=10, ax=ax)

temp_series = mallob_4_df[mallob_4_df["result"]!="UNKNOWN"].sort_values("duration").reset_index()["duration"]
temp_series = pd.Series(dict((v,k) for k,v in temp_series.iteritems()))
temp_series.plot(color="#4daf4a", style=":", marker="x", fillstyle="none", markevery=10, ax=ax)

temp_series = mallob_8_df[mallob_8_df["result"]!="UNKNOWN"].sort_values("duration").reset_index()["duration"]
temp_series = pd.Series(dict((v,k) for k,v in temp_series.iteritems()))
temp_series.plot(color="#984ea3", style="--", marker="*", fillstyle="none", markevery=10, ax=ax)

temp_series = baseline_df[baseline_df["result"]!="UNKNOWN"].sort_values("duration").reset_index()["duration"]
temp_series = pd.Series(dict((v,k) for k,v in temp_series.iteritems()))
temp_series.plot(color="#f781bf", style="-", ax=ax)

ax.set_xlabel("run time in s")
ax.set_ylabel("\# solved problems in $\leq t$ s")

ax.set_xticks([0, 250, 500, 750, 1000])
ax.set_xlim(left=0, right=1000)
ax.set_yticks([0, 50, 100, 150, 200, 250])
ax.set_ylim(bottom=0)

ax.grid(axis="y")

ax.legend(['Sta. $J=2$', 'Sta. $J=4$', 'Sta. $J=8$', 'Port. $J=2$', 'Port. $J=4$', 'Port. $J=8$', 'Cadical'], loc='center left', bbox_to_anchor=(1.025, 0.5))

fig.savefig("final_plots/static_time.pdf")
fig.savefig("final_plots/static_time.pgf")

In [112]:
# Plot solved over time
fig = plt.figure()
fig.subplots_adjust(bottom=0.15, left=0.1, right=0.75)

temp_series = dynamic_2_results_df[dynamic_2_results_df["result"]!="UNKNOWN"].sort_values("duration").reset_index()["duration"]
temp_series = pd.Series(dict((v,k) for k,v in temp_series.iteritems()))
ax = temp_series.plot(color="#377eb8", style="-.", marker="^", fillstyle="none", markevery=10, figsize=(6,3))

temp_series = dynamic_4_results_df[dynamic_4_results_df["result"]!="UNKNOWN"].sort_values("duration").reset_index()["duration"]
temp_series = pd.Series(dict((v,k) for k,v in temp_series.iteritems()))
temp_series.plot(color="#ff7f00", style=":", marker="s", fillstyle="none", markevery=10, ax=ax)

temp_series = dynamic_8_results_df[dynamic_8_results_df["result"]!="UNKNOWN"].sort_values("duration").reset_index()["duration"]
temp_series = pd.Series(dict((v,k) for k,v in temp_series.iteritems()))
temp_series.plot(color="#e41a1c", style="--", marker="o", fillstyle="none", markevery=10, ax=ax)

temp_series = mallob_2_df[mallob_2_df["result"]!="UNKNOWN"].sort_values("duration").reset_index()["duration"]
temp_series = pd.Series(dict((v,k) for k,v in temp_series.iteritems()))
temp_series.plot(color="#a65628", style="-.", marker="+", fillstyle="none", markevery=10, ax=ax)

temp_series = mallob_4_df[mallob_4_df["result"]!="UNKNOWN"].sort_values("duration").reset_index()["duration"]
temp_series = pd.Series(dict((v,k) for k,v in temp_series.iteritems()))
temp_series.plot(color="#4daf4a", style=":", marker="x", fillstyle="none", markevery=10, ax=ax)

temp_series = mallob_8_df[mallob_8_df["result"]!="UNKNOWN"].sort_values("duration").reset_index()["duration"]
temp_series = pd.Series(dict((v,k) for k,v in temp_series.iteritems()))
temp_series.plot(color="#984ea3", style="--", marker="*", fillstyle="none", markevery=10, ax=ax)

temp_series = baseline_df[baseline_df["result"]!="UNKNOWN"].sort_values("duration").reset_index()["duration"]
temp_series = pd.Series(dict((v,k) for k,v in temp_series.iteritems()))
temp_series.plot(color="#f781bf", style="-", ax=ax)

ax.set_xlabel("run time in s")
ax.set_ylabel("\# solved problems in $\leq t$ s")

ax.set_xticks([0, 250, 500, 750, 1000])
ax.set_xlim(left=0, right=1000)
ax.set_yticks([0, 50, 100, 150, 200, 250])
ax.set_ylim(bottom=0)

ax.grid(axis="y")

ax.legend(['Dyn. $J=2$', 'Dyn. $J=4$', 'Dyn. $J=8$', 'Port. $J=2$', 'Port. $J=4$', 'Port. $J=8$', 'Cadical'], loc='center left', bbox_to_anchor=(1.025, 0.5))

fig.savefig("final_plots/dynamic_time.pdf")
fig.savefig("final_plots/dynamic_time.pgf")

In [45]:
mallob_8_df

Unnamed: 0,identifier,start_time,end_time,duration,result
0,1,2161.030,3161.073,1000.043,UNKNOWN
1,2,0.561,1000.566,1000.005,UNKNOWN
2,3,0.547,1000.554,1000.007,UNKNOWN
3,4,161.006,1161.012,1000.006,UNKNOWN
4,5,470.355,1470.361,1000.006,UNKNOWN
...,...,...,...,...,...
394,395,30059.498,31059.508,1000.010,UNKNOWN
395,396,30128.484,30172.246,43.762,SAT
396,397,30168.262,30232.283,64.021,SAT
397,398,30173.816,30201.594,27.778,SAT
