In [1]:
"""
This notebook aggregates all output from toboggan and catfish
for a specified input dataset, then creates runtime histograms
"""
import table_generator
import matplotlib.pyplot as plt
import numpy as np

    
show_flag = False

for froot in ['human', 'mouse', 'zebra']:
    #froot = 'mouse'

    timeoutexceed = 800
    which_alg = "toboggan"

    inputfile = "all-" + froot + ".txt"

    # import table_generator

    datadict, datamatrix, dict_cat_to_tob, dict_tob_to_cat = table_generator.make_tables(inputfile)

    info_dict = table_generator.get_toboggan_timing_info(datadict, datamatrix)
    num_trivial = info_dict['num_trivial']
    num_timedout = info_dict['num_timedout']
    time_totals = info_dict['time_totals']
    total_num = info_dict['total_num']
    nontrivials_dict = info_dict['nontrivials_dict']
    toboggan_completed = info_dict['toboggan_completed']
    toboggan_timeouts = info_dict['toboggan_timeouts']
    toboggan_num_paths_dict = info_dict['toboggan_num_paths_dict']

    table_generator.print_data_summary( froot, len(datadict), num_trivial )
    table_generator.print_alg_summary( which_alg, timeoutexceed, num_timedout, len(time_totals) )

    print(num_timedout + len(time_totals))

    catinputfile = 'catfish-' + froot + '-output.txt'
    catfish_dict, catfish_matrix = table_generator.get_catfish_tables( '../catfish-comparison/', catinputfile)

    # interesect catfish keys with nontrivial keys
    nontrivial_catfish = catfish_dict.copy()
    for key in catfish_dict:
        key_tob = dict_cat_to_tob[key]
        if key_tob not in toboggan_completed:
            nontrivial_catfish.pop(key, None)
    catfish_nontrivials_info_dict = table_generator.get_catfish_timing_info(nontrivial_catfish, catfish_matrix)
    time_totals_catfish_nontrivials = catfish_nontrivials_info_dict['time_totals']

    catfish_on_tob_timeouts = {}
    for key, val in catfish_dict.items():
        key_tob = dict_cat_to_tob[key]
        if key_tob in toboggan_timeouts:
            catfish_on_tob_timeouts[key_tob] = val
    catfish_tob_timeout_info_dict = table_generator.get_catfish_timing_info(catfish_on_tob_timeouts, catfish_matrix)
    time_totals_catfish_tob_timeouts = catfish_tob_timeout_info_dict['time_totals']

    # import matplotlib.pyplot as plt
    # import numpy as np

    #show_flag = True

    plt.clf()
    plt.figure(figsize=(4,3))

    logbins = [10**-4, 10**-3, 10**-2, 10**-1, 1, 10, 100, 1000, 10000]
    extratag = ''
    log_scale_flag = True
    if not log_scale_flag:
        extratag = 'no-log-'

    n, bins, patchs = plt.hist(time_totals, bins=logbins, log=log_scale_flag, alpha=0.5, label='toboggan')
    n, bins, patchs = plt.hist(time_totals_catfish_nontrivials, bins=logbins, log=log_scale_flag, alpha=0.5, label='catfish')
    n, bins, patchs = plt.hist(time_totals_catfish_tob_timeouts, bins=logbins, log=log_scale_flag, alpha=0.5, label='timeouts', color='r')


    axis_font = {'fontname':'DejaVu Sans', 'size':'14'}
    title_font = {'fontname':'DejaVu Sans', 'size':'14'}
    xtick_font = {'fontname':'DejaVu Sans', 'size':'11'}
    ytick_font = {'fontname':'DejaVu Sans', 'size':'12'}

    
    if froot == 'human':
        plt.ylabel("Frequency", **axis_font)
        plt.gca().set_yticklabels([10**x for x in range(0, 6)], **ytick_font)
        plt.gca().set_yscale("log")
        plt.legend(loc='upper right', frameon=False)
    else:
        plt.gca().set_yticklabels([])

    plt.title("dataset: {}".format(froot), **title_font)
    plt.xlabel("time", **axis_font)
    plt.xlim([10**-4, 10**4])
    plt.ylim([10**0, 10**6])
    plt.gca().set_xticklabels([10**x for x in range(-4, 5)], **xtick_font)
    plt.gca().set_xscale("log")

    fig = plt.gcf()
    fig.tight_layout()

    if show_flag == True:
        plt.show()
    else:
        plt.savefig( './figures/'+'runtime-toboggan-' + extratag + froot + '.eps', format='eps', dpi=300)
        plt.savefig( './figures/'+'runtime-toboggan-' + extratag + froot + '.png', format='png', dpi=300)
    plt.close()

human has 
	total   instances:      1169083
	trivial instances:       639560
	nontriv instances:       529523

toboggan with 800s timeout has
	timeout instances:         2478
	success instances:       527045
529523
mouse has 
	total   instances:      1316058
	trivial instances:       842873
	nontriv instances:       473185

toboggan with 800s timeout has
	timeout instances:         2487
	success instances:       470698
473185
zebra has 
	total   instances:      1549373
	trivial instances:      1103493
	nontriv instances:       445880

toboggan with 800s timeout has
	timeout instances:          178
	success instances:       445702
445880
