In [1]:
"""
This notebook aggregates all output from toboggan and catfish
for a specified input dataset, then creates runtime histograms
"""
import table_generator
import matplotlib.pyplot as plt
import numpy as np

    
show_flag = False

time_totals = []
time_totals_catfish_nontrivials = []
time_totals_catfish_tob_timeouts = []

froots = ['human', 'mouse', 'zebra']

for froot in ['human', 'mouse', 'zebra']:
    #froot = 'mouse'

    timeoutexceed = 800
    which_alg = "toboggan"

    inputfile = "all-" + froot + ".txt"

    # import table_generator

    datadict, datamatrix, dict_cat_to_tob, dict_tob_to_cat = table_generator.make_tables(inputfile)

    info_dict = table_generator.get_toboggan_timing_info(datadict, datamatrix)
    num_trivial = info_dict['num_trivial']
    num_timedout = info_dict['num_timedout']
    time_totals.append(info_dict['time_totals'])
    total_num = info_dict['total_num']
    nontrivials_dict = info_dict['nontrivials_dict']
    toboggan_completed = info_dict['toboggan_completed']
    toboggan_timeouts = info_dict['toboggan_timeouts']
    toboggan_num_paths_dict = info_dict['toboggan_num_paths_dict']

    table_generator.print_data_summary( froot, len(datadict), num_trivial )
    table_generator.print_alg_summary( which_alg, timeoutexceed, num_timedout, len(time_totals) )

    print(num_timedout + len(time_totals))

    catinputfile = 'catfish-' + froot + '-output.txt'
    catfish_dict, catfish_matrix = table_generator.get_catfish_tables( '../catfish-comparison/', catinputfile)

    # interesect catfish keys with nontrivial keys
    nontrivial_catfish = catfish_dict.copy()
    for key in catfish_dict:
        key_tob = dict_cat_to_tob[key]
        if key_tob not in toboggan_completed:
            nontrivial_catfish.pop(key, None)
    catfish_nontrivials_info_dict = table_generator.get_catfish_timing_info(nontrivial_catfish, catfish_matrix)
    time_totals_catfish_nontrivials.append(catfish_nontrivials_info_dict['time_totals'])

    catfish_on_tob_timeouts = {}
    for key, val in catfish_dict.items():
        key_tob = dict_cat_to_tob[key]
        if key_tob in toboggan_timeouts:
            catfish_on_tob_timeouts[key_tob] = val
    catfish_tob_timeout_info_dict = table_generator.get_catfish_timing_info(catfish_on_tob_timeouts, catfish_matrix)
    time_totals_catfish_tob_timeouts.append(catfish_tob_timeout_info_dict['time_totals'])

human has 
	total   instances:      1169083
	trivial instances:       639560
	nontriv instances:       529523

toboggan with 800s timeout has
	timeout instances:         2478
	success instances:            1
2479
mouse has 
	total   instances:      1316058
	trivial instances:       842873
	nontriv instances:       473185

toboggan with 800s timeout has
	timeout instances:         2487
	success instances:            2
2489
zebra has 
	total   instances:      1549373
	trivial instances:      1103493
	nontriv instances:       445880

toboggan with 800s timeout has
	timeout instances:          178
	success instances:            3
181


In [2]:
catfish_sum = 0
toboggan_sum = 0
toboggan_long_sum = 0
for j in [0,1,2]:
    print(sum(time_totals_catfish_nontrivials[j]))
    catfish_sum += sum(time_totals_catfish_nontrivials[j])
    toboggan_sum += sum(time_totals[j])
    long_times = [ x for x in time_totals[j] if x > 0.5]
    toboggan_long_sum += sum(long_times)
    print(len(long_times))
print("catfish total: {}".format(catfish_sum))
print("toboggan total: {}".format(toboggan_sum))
print("toboggan total over 2s: {}".format(toboggan_long_sum))
print("toboggan minus long {}".format(toboggan_sum - toboggan_long_sum))



1806.6503698825836
4791
1659.3556747436523
4083
1625.6518473625183
761
catfish total: 5091.657891988754
toboggan total: 143034.45211879964
toboggan total over 2s: 137506.93341922518
toboggan minus long 5527.518699574459


In [11]:
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns

froots = ['human', 'mouse', 'zebra']
show_flag = False
hist_flag = True
kde_flag = False
norm_hist_flag=True

for j in [0,1,2]:

    froot = froots[j]

    plt.clf()
    plt.figure(figsize=(4,3))

    axis_font = {'fontname':'DejaVu Sans', 'size':'13'}
    title_font = {'fontname':'DejaVu Sans', 'size':'13'}
    xtick_font = {'fontname':'DejaVu Sans', 'size':'11'}
    ytick_font = {'fontname':'DejaVu Sans', 'size':'12'}

    data=np.log10(time_totals[j])
    sns.distplot(data, kde=kde_flag, hist=hist_flag, norm_hist=norm_hist_flag, label="toboggan")
    data=np.log10(time_totals_catfish_nontrivials[j])
    sns.distplot(data, kde=kde_flag, hist=hist_flag, norm_hist=norm_hist_flag, label="catfish")
    # data=np.log10(time_totals_catfish_tob_timeouts[j])
    # sns.distplot(data, kde=True, hist=False, label="TOs", ax=ax)

    plt.title(froots[j], **title_font)
    plt.legend(loc='upper right', fontsize=axis_font['size'])
    plt.gca().set_xticklabels([10**x for x in range(-4, 4)])
    plt.xlabel("Runtime (seconds)", **axis_font)
    plt.ylabel("Frequency", **axis_font)

    fig = plt.gcf()
    fig.tight_layout()

    if show_flag == True:
        plt.show()
    else:
        plt.savefig( './figures/'+'runtime-kdensity-' + froot + '.pdf', format='pdf', dpi=300)
        plt.savefig( './figures/'+'runtime-kdensity-' + froot + '.png', format='png', dpi=300)
    plt.close()


In [16]:
slow_insts = 0
total_insts = 0
for j in [0,1,2]:
    temp = [ x for x in time_totals[j] if x > 0.1 ]
    slow_insts += len(temp)
    total_insts += len(time_totals[j])

print( slow_insts / total_insts )

0.013047951255503327


In [4]:
# import matplotlib.pyplot as plt
# import numpy as np
# import seaborn as sns

# froots = ['human', 'mouse', 'zebra']
# show_flag = True
# hist_flag = False
# log_scale_flag = True
# j = 0

# froot = froots[j]
# fig, ax = plt.subplots()

# plt.clf()
# plt.figure(figsize=(4,3))

# axis_font = {'fontname':'DejaVu Sans', 'size':'13'}
# title_font = {'fontname':'DejaVu Sans', 'size':'13'}
# xtick_font = {'fontname':'DejaVu Sans', 'size':'11'}
# ytick_font = {'fontname':'DejaVu Sans', 'size':'12'}


# binspace = np.linspace(-4.0, 4.0, num=10)
# logbins = [ 10**x for x in binspace ]
# print(logbins)

# n, bins, patchs = plt.hist(np.array(time_totals[j]).reshape( (len(time_totals[j]), 1) ), bins=logbins, log=log_scale_flag, alpha=0.5, label='toboggan')
# #n, bins, patchs = plt.hist(time_totals_catfish_nontrivials[j], bins=logbins, log=log_scale_flag, alpha=0.5, label='catfish')
# #n, bins, patchs = plt.hist(time_totals_catfish_tob_timeouts[j], bins=logbins, log=log_scale_flag, alpha=0.5, label='TOs', color='r')
    
    
# # data=np.log10(time_totals[j])
# # sns.distplot(data, kde=True, hist=hist_flag, label="toboggan")
# # data=np.log10(time_totals_catfish_nontrivials[j])
# # sns.distplot(data, kde=True, hist=hist_flag, label="catfish")
# # data=np.log10(time_totals_catfish_tob_timeouts[j])
# # sns.distplot(data, kde=True, hist=False, label="TOs", ax=ax)

# plt.title(froots[j], **title_font)
# plt.legend(loc='upper right', fontsize=axis_font['size'])
# plt.gca().set_xticklabels([10**x for x in range(-4, 4)])
# plt.xlabel("Runtime (seconds)", **axis_font)
# #plt.ylabel("Density", **axis_font)
# plt.xlim( [np.log10(min(logbins)), np.log10(max(logbins))] )
# plt.ylim( [1, 100000] )

# fig = plt.gcf()
# fig.tight_layout()

# if show_flag == True:
#     plt.show()
# else:
#     plt.savefig( './figures/'+'runtime-kdensity-' + froot + '.eps', format='eps', dpi=300)
#     plt.savefig( './figures/'+'runtime-kdensity-' + froot + '.png', format='png', dpi=300)
# plt.close()


