In [1]:
# This notebook aggregates all output from toboggan for an input dataset
# and then creates a runtime histogram for that dataset

# CHANGE froot TO THE DATASET YOU WANT: zebra, human, mouse
froot = 'zebra'

# FILTER DATA
import collections
inputfile = "all-" + froot + ".txt"

import table_generator
datadict, datamatrix = table_generator.make_tables(inputfile)


In [2]:
# LOOKING AT DIFFERENCE BETWEEN k_groundtruth AND k_opt

cutset_kgt = collections.defaultdict(int)
improved_kgt = collections.defaultdict(int)
cutset_improve = collections.defaultdict(int)
kgt_fails = collections.defaultdict(int)
trivial_kgt_fails = collections.defaultdict(int)
kgt_fail_info = []
total_success = 0

for key, val in datadict.items():
    row = datamatrix[val]
    """
    datamatrix[j] = [ n, m, n_red, m_red,
                      k_groundtruth, k_opt, cutset_bound, improved_bound,
                      t_w, t_path]
    """
    kgt = int(row[4])
    kopt = row[5]
    cutset_bound = int(row[6])
    improved_bound = int(row[7])
    if cutset_bound == kgt:
        cutset_kgt[kgt] += 1
    if improved_bound == kgt:
        improved_kgt[kgt] += 1
    if cutset_bound < improved_bound:
        cutset_improve[improved_bound-cutset_bound] += 1
    if kopt == 'None':
        continue
    total_success += 1
    kopt = int(kopt)
    if kgt > kopt:
        k_gap = kgt-kopt
        if int(row[2]) == 1:  # if trivial graph, take note
            trivial_kgt_fails[k_gap] += 1
            continue
        kgt_fails[k_gap] += 1
        kgt_fail_info.append([kgt, kopt])


In [3]:
# Facts about cutsets and lowerbounds

print("==================================================")
print(froot)
print( "".join( [ "=" for g in range(len(froot)) ] ) )
prestr = "{} has total instances".format(froot)
print( "{:35}  {:>12}".format(prestr, len(datadict)) )


print("\ncutset / lowerbound facts")
prestr = "k_groundtruth = max_cutset_size on"
print( "\t{:35}  {:>12}".format(prestr, sum( [val for _, val in cutset_kgt.items()])) )
print( "\t{:>35}  {:>12})".format( "(all but", len(datadict) - sum( [val for _, val in cutset_kgt.items()])) )
prestr = "k_groundtruth = improved_bound on"
print( "\t{:35}  {:>12}".format( prestr, sum( [val for _, val in improved_kgt.items()])) )
print( "\t{:>35}  {:>12})".format( "(all but", len(datadict) - sum( [val for _, val in improved_kgt.items()])) )


# Facts about kgt vs kopt
print("\nk_gt / k_opt facts")
print( "\t{:35}  {:>12}".format("toboggan has finished on", total_success) )

trivial_fails = sum( [val for _, val in trivial_kgt_fails.items()])
nontrivial_fails = sum( [val for _, val in kgt_fails.items()])
total_fails = nontrivial_fails + trivial_fails

print( "\t{:35}  {:>12}".format("of those, k_groundtruth > k_opt on:", total_fails) )
print( "\t{:>35}  {:>12}".format("trivial instances", trivial_fails ) )
print( "\t{:>35}  {:>12}".format("nontrivial instances", nontrivial_fails ) )

# How big are k_groundtruth and k_optimal on these instances?

import numpy as np
averages = np.average(kgt_fail_info,axis=0)
print( "\t{:35}  {:12.2f}".format("average k_gt", averages[0] ) )
print( "\t{:35}  {:12.2f}".format("average k_opt", averages[1] ) )

zebra
=====
zebra has total instances                 1549373

cutset / lowerbound facts
	k_groundtruth = max_cutset_size on        1540685
	                           (all but          8688)
	k_groundtruth = improved_bound on         1546699
	                           (all but          2674)

k_gt / k_opt facts
	toboggan has finished on                  1549195
	of those, k_groundtruth > k_opt on:           455
	                  trivial instances            53
	               nontrivial instances           402
	average k_gt                                 5.21
	average k_opt                                4.13
