In [1]:
from ltlcross_wrapper import ResAnalyzer
import pandas as pd

In [2]:
from tools import tools, benchmark_names

Visualization of the cumulative data over all benchmarks

In [60]:
def gather_cumulative(benchmarks, tools=None, transpose=True, **kwargs):
    data = pd.DataFrame()
    for (name,b) in benchmarks.items():
        tmp = pd.DataFrame(b.cumulative(tool_set=tools, highlight=False, **kwargs))
        tmp.columns = [name]
        data = data.append(tmp.transpose())
    if transpose:
        return data.transpose().style.apply(highlight_min, axis=0)
    else:
        return data.style.apply(highlight_min, axis=1)
    
def gather_mins(benchmarks, tools=None, transpose=True):
    data = pd.DataFrame()
    for (name,b) in benchmarks.items():
        tmp = b.min_counts(tool_set=tools)
        tmp.columns = pd.MultiIndex.from_tuples([(name, c) for c in tmp.columns])
        data = data.append(tmp.transpose(),sort=False).fillna(0)
    if transpose:
        return data.transpose().style.apply(highlight_max, axis=0)
    return data.style.apply(highlight_max, axis=1)

def highlight_min(s):    
    is_min = s == s.min()
    return ['background-color: lightgreen' if v else '' for v in is_min]

def highlight_max(s):    
    is_max = s == s.max()
    return ['background-color: lightgreen' if v else '' for v in is_max]

In [61]:
names = benchmark_names
tools = ["yes.autfilt","yes.ncsb#best","yes.goal#fri","yes.goal#pit","yes.buechenic"]

In [62]:
benchmarks = {}
for name in names:
    b = ResAnalyzer(f"data/{name}.csv", tool_set=tools, cols=["states","time","acc","transitions","edges"])
    b.name = name
    b.orig_count = len(b.values)
    b.clean_count = len(b.values.dropna())
    benchmarks[name] = b

In [63]:
gather_cumulative(benchmarks)

Unnamed: 0_level_0,literature_det,literature_sd,literature_nd,random_det,random_sd,random_nd
tool,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
yes.autfilt,611,191,188,2439,2892,5492
yes.buechenic,1388,866,309,3638,5766,6330
yes.goal#fri,625,322,189,2490,3367,5305
yes.goal#pit,615,872,243,2451,3420,7645
yes.ncsb#best,622,240,186,2473,2828,5003


## Minimal automata
The follwing table shows for how many formulas each tool produces automaton that has the smallest number of states. The minimum ranges over the considered tools. The number in `min hits` shows how many times the same size as the smallest automaton was achieved. The number in `unique min hits` counts only cases where the given tool is the only tool with such a small automaton.

In [64]:
gather_mins(benchmarks)

Unnamed: 0_level_0,literature_det,literature_det,literature_sd,literature_sd,literature_nd,literature_nd,random_det,random_det,random_sd,random_sd,random_nd,random_nd
Unnamed: 0_level_1,unique min hits,min hits,unique min hits,min hits,unique min hits,min hits,unique min hits,min hits,unique min hits,min hits,unique min hits,min hits
yes.autfilt,6,150,10,40,3,10,8,489,36,354,43,196
yes.goal#fri,0,139,2,26,7,15,7,464,21,258,62,230
yes.goal#pit,0,145,0,31,0,6,0,476,3,262,8,91
yes.ncsb#best,0,142,4,37,1,9,0,465,56,417,101,295
yes.buechenic,0,0,0,0,0,0,0,6,1,4,45,55


### Time in seconds

In [65]:
gather_cumulative(benchmarks, col="time")

Unnamed: 0_level_0,literature_det,literature_sd,literature_nd,random_det,random_sd,random_nd
tool,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
yes.autfilt,8.57435,2.5377,14.1334,25.9194,28.1235,155.879
yes.buechenic,980.031,239.804,672.446,1136.08,1240.11,3130.46
yes.goal#fri,862.555,420.565,332.866,2347.04,2483.33,3709.36
yes.goal#pit,791.033,478.867,334.117,2364.22,2570.98,4112.3
yes.ncsb#best,10.2817,2.85127,2.68155,29.3337,30.7912,384.036


## Closer look at benchmark by input automaton type

In [57]:
for t in ["nd", "sd", "det"]:
    bench = {n: b for n, b in benchmarks.items() if n[-3:].find(t) >= 0}
    display(t, gather_cumulative(bench, tools, transpose=True))
    for b in bench.values():
        display(b.cross_compare(tools, include_fails=True))
    print("\n\n\n")

'nd'

Unnamed: 0_level_0,literature_nd,random_nd
tool,Unnamed: 1_level_1,Unnamed: 2_level_1
yes.autfilt,188,5492
yes.buechenic,309,6330
yes.goal#fri,189,5305
yes.goal#pit,243,7645
yes.ncsb#best,186,5003


Unnamed: 0,yes.autfilt,yes.ncsb#best,yes.goal#fri,yes.goal#pit,yes.buechenic,V
yes.autfilt,,7.0,5.0,11.0,18.0,41
yes.ncsb#best,12.0,,6.0,15.0,18.0,51
yes.goal#fri,12.0,9.0,,15.0,19.0,55
yes.goal#pit,6.0,2.0,1.0,,16.0,25
yes.buechenic,2.0,2.0,1.0,4.0,,9


Unnamed: 0,yes.autfilt,yes.ncsb#best,yes.goal#fri,yes.goal#pit,yes.buechenic,V
yes.autfilt,,195.0,223.0,357.0,374.0,1149
yes.ncsb#best,253.0,,254.0,374.0,407.0,1288
yes.goal#fri,230.0,213.0,,368.0,391.0,1202
yes.goal#pit,100.0,86.0,88.0,,281.0,555
yes.buechenic,125.0,91.0,109.0,218.0,,543








'sd'

Unnamed: 0_level_0,literature_sd,random_sd
tool,Unnamed: 1_level_1,Unnamed: 2_level_1
yes.autfilt,191,2892
yes.buechenic,866,5766
yes.goal#fri,322,3367
yes.goal#pit,872,3420
yes.ncsb#best,240,2828


Unnamed: 0,yes.autfilt,yes.ncsb#best,yes.goal#fri,yes.goal#pit,yes.buechenic,V
yes.autfilt,,11.0,22.0,18.0,46.0,97
yes.ncsb#best,15.0,,24.0,12.0,47.0,98
yes.goal#fri,12.0,2.0,,10.0,44.0,68
yes.goal#pit,9.0,0.0,16.0,,41.0,66
yes.buechenic,3.0,2.0,4.0,8.0,,17


Unnamed: 0,yes.autfilt,yes.ncsb#best,yes.goal#fri,yes.goal#pit,yes.buechenic,V
yes.autfilt,,108.0,237.0,229.0,483.0,1057
yes.ncsb#best,142.0,,246.0,234.0,486.0,1108
yes.goal#fri,109.0,91.0,,151.0,473.0,824
yes.goal#pit,69.0,54.0,148.0,,451.0,722
yes.buechenic,13.0,14.0,24.0,48.0,,99








'det'

Unnamed: 0_level_0,literature_det,random_det
tool,Unnamed: 1_level_1,Unnamed: 2_level_1
yes.autfilt,611,2439
yes.buechenic,1388,3638
yes.goal#fri,625,2490
yes.goal#pit,615,2451
yes.ncsb#best,622,2473


Unnamed: 0,yes.autfilt,yes.ncsb#best,yes.goal#fri,yes.goal#pit,yes.buechenic,V
yes.autfilt,,14.0,13.0,10.0,152.0,189
yes.ncsb#best,14.0,,8.0,4.0,148.0,174
yes.goal#fri,15.0,13.0,,5.0,152.0,185
yes.goal#pit,15.0,11.0,6.0,,152.0,184
yes.buechenic,0.0,4.0,0.0,0.0,,4


Unnamed: 0,yes.autfilt,yes.ncsb#best,yes.goal#fri,yes.goal#pit,yes.buechenic,V
yes.autfilt,,64.0,40.0,42.0,494.0,640
yes.ncsb#best,9.0,,30.0,16.0,489.0,544
yes.goal#fri,20.0,57.0,,36.0,492.0,605
yes.goal#pit,11.0,48.0,21.0,,492.0,572
yes.buechenic,0.0,5.0,2.0,2.0,,9








## Without simplifications of Spot
GOAL#pit runs removing dead and unreachable states, Buechenic probably does not create such states. The rest of the tools does not remove them.

We can observe that Fribourg generates large ammount of unnecessary states before simplifications.

In [67]:
no_tools = [t.replace("yes","no") for t in tools]

In [68]:
gather_cumulative(benchmarks, tools=no_tools)

Unnamed: 0_level_0,literature_det,literature_sd,literature_nd,random_det,random_sd,random_nd
tool,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
no.autfilt,613,257,205,2442,3578,7604
no.buechenic,1635,915,313,4710,6903,7627
no.goal#fri,1296,2518,2946,5059,17369,56448
no.goal#pit,634,513,336,2510,4990,13834
no.ncsb#best,804,439,562,3060,6446,20750


## Minimal automata
The follwing table shows for how many formulas each tool produces automaton that has the smallest number of states. The minimum ranges over the considered tools. The number in `min hits` shows how many times the same size as the smallest automaton was achieved. The number in `unique min hits` counts only cases where the given tool is the only tool with such a small automaton.

In [69]:
gather_mins(benchmarks, tools=no_tools)

Unnamed: 0_level_0,literature_det,literature_det,literature_sd,literature_sd,literature_nd,literature_nd,random_det,random_det,random_sd,random_sd,random_nd,random_nd
Unnamed: 0_level_1,unique min hits,min hits,unique min hits,min hits,unique min hits,min hits,unique min hits,min hits,unique min hits,min hits,unique min hits,min hits
no.autfilt,20,152,9,42,16,16,41,500,181,413,268,339
no.goal#pit,0,132,1,24,1,1,0,441,55,186,14,57
no.ncsb#best,0,11,2,20,1,1,0,68,17,153,24,56
no.buechenic,0,0,4,4,2,2,0,6,14,29,112,143
no.goal#fri,0,0,0,0,0,0,0,6,0,0,0,0


### Time in seconds

In [70]:
gather_cumulative(benchmarks, col="time", tools=no_tools)

Unnamed: 0_level_0,literature_det,literature_sd,literature_nd,random_det,random_sd,random_nd
tool,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
no.autfilt,7.94785,2.59661,1.14557,25.8379,27.3733,32.8517
no.buechenic,1005.34,234.268,700.125,1134.28,1222.84,3201.37
no.goal#fri,857.512,421.558,315.609,2364.8,2478.42,3531.84
no.goal#pit,856.772,506.556,353.231,2342.6,2569.2,3901.2
no.ncsb#best,1.75239,0.712357,0.857351,5.64361,29.1077,15.3791


In [58]:
for t in ["nd", "sd", "det"]:
    bench = {n: b for n, b in benchmarks.items() if n[-3:].find(t) >= 0}
    display(t, gather_cumulative(bench, no_tools, transpose=True))
    for b in bench.values():
        display(b.cross_compare(no_tools))
    print("\n\n\n")

'nd'

Unnamed: 0_level_0,literature_nd,random_nd
tool,Unnamed: 1_level_1,Unnamed: 2_level_1
no.autfilt,205,7604
no.buechenic,313,7627
no.goal#fri,2946,56448
no.goal#pit,336,13834
no.ncsb#best,562,20750


Unnamed: 0,no.autfilt,no.ncsb#best,no.goal#fri,no.goal#pit,no.buechenic,V
no.autfilt,,19.0,20.0,17.0,18.0,74
no.ncsb#best,1.0,,17.0,3.0,6.0,27
no.goal#fri,0.0,3.0,,0.0,5.0,8
no.goal#pit,3.0,17.0,19.0,,12.0,51
no.buechenic,2.0,14.0,15.0,8.0,,39


Unnamed: 0,no.autfilt,no.ncsb#best,no.goal#fri,no.goal#pit,no.buechenic,V
no.autfilt,,443.0,500.0,435.0,364.0,1742
no.ncsb#best,56.0,,477.0,227.0,212.0,972
no.goal#fri,0.0,23.0,,3.0,26.0,52
no.goal#pit,61.0,268.0,494.0,,238.0,1061
no.buechenic,135.0,288.0,474.0,261.0,,1158








'sd'

Unnamed: 0_level_0,literature_sd,random_sd
tool,Unnamed: 1_level_1,Unnamed: 2_level_1
no.autfilt,257,3578
no.buechenic,915,6903
no.goal#fri,2518,17369
no.goal#pit,513,4990
no.ncsb#best,439,6446


Unnamed: 0,no.autfilt,no.ncsb#best,no.goal#fri,no.goal#pit,no.buechenic,V
no.autfilt,,28.0,49.0,25.0,45.0,147
no.ncsb#best,8.0,,49.0,17.0,42.0,116
no.goal#fri,0.0,0.0,,1.0,25.0,26
no.goal#pit,10.0,22.0,47.0,,37.0,116
no.buechenic,4.0,7.0,23.0,11.0,,45


Unnamed: 0,no.autfilt,no.ncsb#best,no.goal#fri,no.goal#pit,no.buechenic,V
no.autfilt,,372.0,500.0,313.0,471.0,1656
no.ncsb#best,50.0,,500.0,204.0,369.0,1123
no.goal#fri,0.0,0.0,,1.0,108.0,109
no.goal#pit,86.0,244.0,499.0,,412.0,1241
no.buechenic,28.0,131.0,390.0,88.0,,637








'det'

Unnamed: 0_level_0,literature_det,random_det
tool,Unnamed: 1_level_1,Unnamed: 2_level_1
no.autfilt,613,2442
no.buechenic,1635,4710
no.goal#fri,1296,5059
no.goal#pit,634,2510
no.ncsb#best,804,3060


Unnamed: 0,no.autfilt,no.ncsb#best,no.goal#fri,no.goal#pit,no.buechenic,V
no.autfilt,,141.0,152.0,25.0,152.0,470
no.ncsb#best,0.0,,150.0,4.0,144.0,298
no.goal#fri,0.0,0.0,,1.0,90.0,91
no.goal#pit,0.0,137.0,151.0,,151.0,439
no.buechenic,0.0,7.0,61.0,0.0,,68


Unnamed: 0,no.autfilt,no.ncsb#best,no.goal#fri,no.goal#pit,no.buechenic,V
no.autfilt,,449.0,500.0,71.0,500.0,1520
no.ncsb#best,0.0,,490.0,36.0,477.0,1003
no.goal#fri,0.0,8.0,,0.0,208.0,216
no.goal#pit,0.0,414.0,492.0,,498.0,1404
no.buechenic,0.0,22.0,287.0,2.0,,311








## Errors
We can observe several timeouts (120s) and 2 parsing errors on GOAL configurations. These occur on 2 formulas equivalent to false. GOAL does not see the automata as Büchi.

In [38]:
for name, b in benchmarks.items():
    display(name, b.get_error_counts())

'literature_det'

Unnamed: 0,timeout,parse error,incorrect,crash,no output
no.buechenic,4,0,0,0,0
no.goal#pit,1,0,0,0,0
yes.buechenic,4,0,0,0,0


'literature_sd'

Unnamed: 0,timeout,parse error,incorrect,crash,no output
no.buechenic,1,0,0,0,0
no.goal#fri,1,0,0,0,0
no.goal#pit,2,0,0,0,0
yes.buechenic,1,0,0,0,0
yes.goal#fri,1,0,0,0,0


'literature_nd'

Unnamed: 0,timeout,parse error,incorrect,crash,no output
no.buechenic,4,0,0,0,0
no.goal#fri,1,0,0,0,0
no.goal#pit,1,0,0,0,0
yes.buechenic,3,0,0,0,0
yes.goal#fri,1,0,0,0,0
yes.goal#pit,1,0,0,0,0


'random_det'

Unnamed: 0,timeout,parse error,incorrect,crash,no output
no.goal#fri,0,2,0,0,0
no.goal#pit,0,2,0,0,0
yes.goal#fri,0,2,0,0,0
yes.goal#pit,0,2,0,0,0


'random_sd'

Unnamed: 0_level_0,timeout,parse error,incorrect,crash,no output
tool,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1


'random_nd'

Unnamed: 0,timeout,parse error,incorrect,crash,no output
no.buechenic,4,0,0,0,0
no.goal#fri,3,0,0,0,0
no.goal#pit,4,0,0,0,0
yes.autfilt,1,0,0,0,0
yes.buechenic,4,0,0,0,0
yes.goal#fri,4,0,0,0,0
yes.goal#pit,6,0,0,0,0
yes.ncsb#best,1,0,0,0,0
yes.ncsb#pldi,1,0,0,0,0
yes.ncsb#spot,1,0,0,0,0


In [16]:
b = benchmarks["random_nd"]

frib = "yes.goal#fri"
pldi = "yes.ncsb#pldi"
ncsb = "yes.ncsb#spot"
spot = "yes.autfilt"
best = "yes.ncsb#best"

In [17]:
b.bokeh_scatter_plot(frib, best, alpha=.5)

In [18]:
b.bokeh_scatter_plot(pldi, ncsb)

In [19]:
b.bokeh_scatter_plot(best, spot)

In [20]:
display(b.min_counts(), b.min_counts(unique_only=True))

Unnamed: 0,0
no.autfilt,6
no.buechenic,23
yes.autfilt,83
yes.buechenic,37
yes.goal#fri,98
yes.goal#pit,37
yes.ncsb#best,160
yes.ncsb#pldi,94
yes.ncsb#spot,150


Unnamed: 0,0
yes.autfilt,24
yes.buechenic,13
yes.goal#fri,40
yes.goal#pit,1


In [19]:
ncsb_tools = [t for t in b.tools if t.startswith("yes") and t.find("ncsb") >= 0]

In [20]:
b.compute_best(tool_set=ncsb_tools,new_col_name="NCSB-best")

In [21]:
b.bokeh_scatter_plot(frib, "NCSB-best")