### RQ1. Effectiveness (Range of Results)

In [29]:
import os
import pandas as pd
import numpy as np

#### SBFL results

In [30]:
sblf_result_file = "results/purely_sbfl/ranks.pkl"
sbfl_df = pd.read_pickle(sblf_result_file)
sbfl_formulas = sbfl_df.columns.values

In [31]:
for sbfl_formula in sbfl_formulas:
    best_ranks = sbfl_df[sbfl_formula].apply(np.min).values
    best_ranks = list(set(best_ranks) - set([1570]))
    min_rank = np.min(best_ranks)
    max_rank = np.max(best_ranks)
    #print(best_ranks)
    std_v = np.round(np.std(best_ranks), decimals=2)
    print ("{} (min ~ max, std): {} ~ {} ({:.2f})".format(sbfl_formula, min_rank, max_rank, std_v))

dstar (min ~ max, std): 1 ~ 751 (146.25)
ochiai (min ~ max, std): 1 ~ 748 (153.91)
tarantula (min ~ max, std): 1 ~ 259 (75.78)
barinel (min ~ max, std): 1 ~ 243 (74.98)


#### SBFL scores (suspiciousness) as GP features 

In [32]:
from main_learn2rank import select_mdl

sbfl_gp_result_dir = "results/gp/sbfl"
fold_file = "data/data_folds.tsv"

data_folds_df = pd.read_csv(fold_file, delimiter='\t')
data_folds = {int(test_fold_idx):commits.split(",") for test_fold_idx,commits in data_folds_df.values}

In [33]:
def get_mdl_indices(selected_mdls, indices_to_fold, which = 'med'):
    """
    """
    indices_of_models = {}
    for idx in indices_to_fold:
        indices_of_models[idx] = selected_mdls.loc[
            (selected_mdls.fold == idx) & (selected_mdls.which == which)].iter.values[0]
    
    return indices_of_models

def get_ranks_of_specific_mdls(result_dir, data_folds, mdl_indices):
    """
    commits -> target flaky commit (the fix commit of flaky test, more exactly)
    mdl_indices -> for each fold
    """
    import glob
    ranks_per_commit = {}
    for fold_idx, commits_in_fold in data_folds.items():
        mdl_idx = mdl_indices[fold_idx]
        for commit in commits_in_fold:
            result_file_pat = os.path.join(result_dir, "{}.{}.test.result.csv".format(commit, mdl_idx))
            result_files = glob.glob(result_file_pat)
            assert len(result_files) == 1, result_file_pat
            result_file = result_files[0]
            df = pd.read_csv(result_file)
            ranks = df.loc[df.isSusFlaky == 1].sp_rank.values
            ranks_per_commit[commit[:8]] = ranks
            
    return ranks_per_commit
    

In [34]:
selected_mdls = select_mdl(None, 'gp', sbfl_gp_result_dir, "data", 30, data_folds)
med_mdl_indices = get_mdl_indices(selected_mdls, np.arange(10), which = 'med')
med_mdl_ranks = get_ranks_of_specific_mdls(sbfl_gp_result_dir, data_folds, med_mdl_indices)
med_mdl_best_ranks = [np.min(rs) for rs in med_mdl_ranks.values()]
#best_mdl_indices = get_mdl_indices(selected_mdls, np.arange(10), which = 'best')
#best_ranks = get_ranks_of_specific_mdls(sbfl_gp_result_dir, data_folds, best_mdl_indices)

In [35]:
print ("For GP with only SBFL scores as it input features, \n  ranks: {} ~ {} and std of ranks: {:.2f}"
    .format(np.min(med_mdl_best_ranks), np.max(med_mdl_best_ranks), np.round(np.std(med_mdl_best_ranks), decimals=2)))

For GP with only SBFL scores as it input features, 
  ranks: 1 ~ 1570 and std of ranks: 274.34
