In [2]:
"""
Look at runtimes on instances where Catfish fails
i.e. instances where k_catfish > k_opt
"""

import matplotlib.pyplot as plt
import numpy as np
import collections
import table_generator

import scipy.stats as scistat

# CHANGE froot TO THE DATASET YOU WANT: zebra, human, mouse
for froot in ['zebra', 'mouse', 'human']:
    timeoutexceed = 800
    which_alg = "toboggan"

    inputfile = "all-" + froot + ".txt"


    # [1] Get all dataset and toboggan info
    datadict, datamatrix, dict_cat_to_tob, dict_tob_to_cat = table_generator.make_tables(inputfile)
    info_dict = table_generator.get_toboggan_timing_info(datadict, datamatrix)
    nontrivials_dict = info_dict['nontrivials_dict']
    toboggan_completed = info_dict['toboggan_completed']
    toboggan_timeouts = info_dict['toboggan_timeouts']
    toboggan_num_paths_dict = info_dict['toboggan_num_paths_dict']

    catinputfile = 'catfish-' + froot + '-output.txt'
    catfish_dict, catfish_matrix = table_generator.get_catfish_tables( '../catfish-comparison/', catinputfile)

    # [2] Now look at Toboggan runtime on instances where Catfish fails
    # First, find instances where k_catfish > k_opt
    _, _, catfish_numpaths = table_generator.get_catfish_timing_info( catfish_dict, catfish_matrix )

    tob_on_cat_failures = {}
    catfish_fail_times = {}
    gap_info = collections.defaultdict(int)
    for key, val in catfish_dict.items():
        key_tob = dict_cat_to_tob[key]
        num_tob_paths = toboggan_num_paths_dict[key_tob]
        if num_tob_paths is None:
            continue
        num_cat_paths = int(catfish_numpaths[key])
        num_tob_paths = int(num_tob_paths)
        if  num_tob_paths < num_cat_paths:
            tob_on_cat_failures[key_tob] = toboggan_completed[key_tob]
            catfish_fail_times[key] = val
            gap_info[ num_cat_paths - num_tob_paths ] += 1

    toboggan_times = list(tob_on_cat_failures.values())
    catfish_fail_times, _, _ = table_generator.get_catfish_timing_info(catfish_fail_times, catfish_matrix)

    
    print(froot)
    print(gap_info)
    print( "total instances that catfish fails: {}".format(sum(gap_info.values())) )
    print("")
    
    plt.clf()
    logbins = [10**-4, 10**-3, 10**-2, 10**-1, 1, 10, 100, 1000, 10000]
    extratag = ''
    log_scale_flag = True
    if not log_scale_flag:
        extratag = 'no-log-'

    n, bins, patchs = plt.hist(toboggan_times, bins=logbins, log=log_scale_flag, alpha=0.5, label='toboggan')
    n, bins, patchs = plt.hist(catfish_fail_times, bins=logbins, log=log_scale_flag, alpha=0.5, label='catfish')

    plt.legend(loc='upper right', frameon=False)
    plt.title("Runtime on Catfish failures: {}".format(froot))
    plt.xlabel("time")
    plt.ylabel("Frequency")
    plt.gca().set_xscale("log")

    #plt.show()
    plt.savefig( ''.join(['runtime-catfish-failures-', extratag, froot, '.eps']), format='eps', dpi=300)
    plt.savefig( ''.join(['runtime-catfish-failures-', extratag, froot, '.png']), format='png', dpi=300)
    plt.close()

zebra
defaultdict(<class 'int'>, {1: 79, 2: 5})
total instances that catfish fails: 84

mouse
defaultdict(<class 'int'>, {1: 290, 2: 89, 3: 60, 10: 8, 5: 24, 4: 36, 13: 4, 15: 1, 7: 12, 9: 6, 11: 4, 8: 11, 6: 14, 18: 1, 29: 1, 16: 2, 14: 2, 17: 2})
total instances that catfish fails: 567

human
defaultdict(<class 'int'>, {1: 297, 2: 28, 3: 1})
total instances that catfish fails: 326

