In [None]:
import os 
import pandas as pd 

import matplotlib.pylab as plt
import seaborn as sns
import numpy as np

from matplotlib.backends.backend_pdf import PdfPages

In [None]:
#globals
DATA_PATH = '../results/stats_results'

ALGOS = ['RF', 'DT', 'KNN','FFT']
SCORERS = ['MCC','Gmean', 'F1', 'F1_W',  'ACC', 'BAL_ACC',  'AUC', 'LogLoss']
STUDIED_RELEASES = {   
    'ActiveMQ': [
        "activemq-5.0.0", 'activemq-5.1.0', "activemq-5.2.0",'activemq-5.3.0', 'activemq-5.8.0'
        ],
    'Derby': [
        'derby-10.2.1.6', 'derby-10.3.1.4', 'derby-10.5.1.1'
    ],
    'Groovy': [
        'groovy-1_5_7', 'groovy-1_6_BETA_1', 'groovy-1_6_BETA_2'
    ],
    'Hbase': [
         'hbase-0.94.0', 'hbase-0.95.0', 'hbase-0.95.2'
    ],
    'Hive': [
        'hive-0.9.0', 'hive-0.10.0', 'hive-0.12.0'
    ],
    'Jruby':[
        'jruby-1.1', 'jruby-1.4.0', 'jruby-1.5.0', 'jruby-1.7.0'
    ],
    'Lucene': [
        'lucene-2.3.0','lucene-2.9.0', 'lucene-3.0.0', 'lucene-3.1.0'
    ],
    'Wicket': [
        'wicket-1.3.0-incubating-beta-1', 'wicket-1.3.0-beta2', 'wicket-1.5.3'
    ]
}
ALL_STUDIED_RELEASES = [
     "activemq-5.0.0", 'activemq-5.1.0', "activemq-5.2.0",'activemq-5.3.0',
     'derby-10.2.1.6', 'derby-10.3.1.4', 'groovy-1_5_7', 'groovy-1_6_BETA_1',
     'hbase-0.94.0', 'hbase-0.95.0',  'hive-0.9.0', 'hive-0.10.0', 
     'jruby-1.1', 'jruby-1.4.0', 'jruby-1.5.0', 'lucene-2.3.0','lucene-2.9.0',
    'lucene-3.0.0','wicket-1.3.0-incubating-beta-1', 'wicket-1.3.0-beta2'
]
DATA = {
    
    'MCC': {
        'data': pd.concat([pd.read_csv(os.path.join(DATA_PATH, 'MCC_results.csv')), pd.read_csv(os.path.join(DATA_PATH, 'MCC_FFT_results.csv'))]),
        'round_digits' :3
    },
    'G': {
        'data': pd.concat([pd.read_csv(os.path.join(DATA_PATH, 'G_results.csv')), pd.read_csv(os.path.join(DATA_PATH, 'G_FFT_results.csv'))]),
        'round_digits' :3
    },
    'F1': {
        'data': pd.concat([pd.read_csv(os.path.join(DATA_PATH, 'F1_results.csv')), pd.read_csv(os.path.join(DATA_PATH, 'G_FFT_results.csv'))]),
        'round_digits' :3
    },
    'AUC': {
        'data': pd.read_csv(os.path.join(DATA_PATH, 'AUC_results.csv')),
        'round_digits' :3
    }
}

In [32]:
#helpers
def RQ1_tables(metrics_dfs,orgs = STUDIED_RELEASES,
               algos = ALGOS,
               scorers = SCORERS
               ) : 
    
    odd = 1
    for org, releases in orgs.items(): 
        org_lines = []
        #print('org:',org,'=====================================================')
        print(f'\\multirow{{{len(algos)*(len(releases) - 1)}}}{{2cm}}{{{org}}}')
        for release_index, release in enumerate(releases[:-1]):
            print(f'&\\multirow{{{len(algos)}}}{{2cm}}{{{extract_release(release)}}}&\\multirow{{{len(algos)}}}{{2cm}}{{{extract_release(releases[release_index + 1])}}}')
            
            for index,algo in enumerate(algos): 
                odd =1-odd
                if index == 0 :
                    line = "&"
                else: 
                    line = "&&&"
                if odd == 1: 
                    line +="\\rowcolor[HTML]{DADADA}"
                line +=algo+'&'
                for index,(metric,metric_data) in enumerate(metrics_dfs.items()) :
                    for scorer_index, scorer in enumerate(scorers):
                        model_metric = select_algo_performance(metric_data['data'],release,algo, scorer=scorer) 
                        item = '-'
                        if not (model_metric is None):

                            item = f'{round(model_metric["mean"],metric_data["round_digits"])} ({model_metric["group"]})'
                            if model_metric['group'] == 1 : 
                                item = f'\\textbf{{{item}}}'
                                
                        if index == len(metrics_dfs)-1 and  scorer_index == len(scorers) - 1: 
                            item += "\\\\"
                        else :
                            item+= "&"
                        line +=item
                org_lines.append(line)
                print(line)
            print('\\cline{2-36}')
        print('\\hline')

def compute_improvements(data, studied_scorer, studied_files=ALL_STUDIED_RELEASES, studied_algos = ALGOS): 
    res = {}
    for algo in  studied_algos: 
        res[algo] =  []
        algo_data = data[data['model'] == algo]
        for file in studied_files: 
            file_data = algo_data[algo_data['file'] == file]
            best_performance = file_data[file_data['group'] == 1]
            best_file_data_scorers = file_data[file_data['group'] == 1]['scorer'].unique()
            if studied_scorer in best_file_data_scorers: 
                continue 

            else : 
                best_score = best_performance.to_dict(orient='records')[0]['mean']
                best_scorer =  best_performance.to_dict(orient='records')[0]['scorer']
                scorer_score = file_data[file_data['scorer'] == studied_scorer].to_dict(orient='records')[0]['mean']
                if scorer_score <= 0 : 
                    best_score += abs(scorer_score) + 2
                    scorer_score = 2
                res[algo].append(round((best_score - scorer_score)*100/scorer_score, 2))
    
    return res 

            

def plot_metric_corrolation(dfs, files, scorer, algo, ax= None, cbar = True): 
    corrolation_map = metric_corrolation(dfs, files, scorer, algo)
    print(corrolation_map)
    sns.heatmap(corrolation_map, annot=True, ax=ax,  vmin=-1, vmax = 1, cbar = cbar)
    
def metric_corrolation(dfs, files, scorer, algo): 
    metric_ranks_df = performance_metric_corrolation(dfs, files, scorer, algo)
    correlation_matrix = metric_ranks_df.corr(method='spearman')
    return correlation_matrix

def performance_metric_corrolation(dfs, files, scorer, algo) : 
    metric_ranks = {}
    for metric, metric_data in dfs.items(): 
        print(metric_data)
        selected_data = metric_data[(metric_data['file'].isin(files)) & (metric_data['scorer'] == scorer) & (metric_data['model'] == algo)]
        selected_data = selected_data.sort_values(by = ["file"])
        print(selected_data)
        metric_ranks[metric] = selected_data['group'].values
    return pd.DataFrame(metric_ranks)

def scatter_ranks_plot(df, scorerX, scorerY, algo='RF', ax=None):
    df_cp = df[(df['scorer'].isin([scorerX, scorerY]))&(df['model'] == algo)]
    df_cp = df_cp.sort_values(by=['file'])
    scorerX_ranks = df_cp[df_cp['scorer'] == scorerX]['group'].astype(int).values
    scorerY_ranks = df_cp[df_cp['scorer'] == scorerY]['group'].astype(int).values
    #fig, my_ax=plt.subplots()
    df_count = pd.DataFrame({
        scorerX + " tuning metric ranks": scorerX_ranks,
        scorerY + " tuning metric ranks": scorerY_ranks
    })
    df_count= df_count.groupby([scorerX + " tuning metric ranks", scorerY + " tuning metric ranks"]).size().reset_index(name='Count')
    print(df_count)
    sns.scatterplot(data=df_count, x=scorerX + " tuning metric ranks", y=scorerY + " tuning metric ranks", size= "Count", ax=ax)


def plot_per_metric(metric, df, studied_files=ALL_STUDIED_RELEASES, studied_algos = ALGOS, studied_scorers=SCORERS):
    fig, axs = plt.subplots(nrows = 1, ncols = len(studied_algos),  figsize = (25, 5))
    for algo_index, algo in enumerate(studied_algos): 
        cbar= False
        if algo_index == len(studied_algos) - 1:
            cbar= True
        if algo == "FFT":
            plot_corrolation(df, studied_files, [algo], ['MCC', 'F1', 'F1_W', 'ACC', 'Gmean', 'ACC', 'BAL_ACC'], axs[algo_index], vmin=-1, vmax=1, cbar = cbar)    
        else:
            plot_corrolation(df, studied_files, [algo], studied_scorers, axs[algo_index], vmin=-1, vmax=1, cbar = cbar)
        axs[algo_index].set_title(algo)
    
    return fig


def plot_corrolation(df, studied_files=ALL_STUDIED_RELEASES, studied_algos = ALGOS, studied_scorers=SCORERS, ax=None, vmin=0, vmax = 16, cbar =False): 
    corr_matrix = comput_corrolation(df, studied_files, studied_algos, studied_scorers)
    mask = np.triu(np.ones_like(corr_matrix, dtype=bool))
    sns.heatmap(corr_matrix, annot=True, ax=ax,  vmin=vmin, vmax = vmax, cbar =cbar)

    
def comput_corrolation(df, studied_files=ALL_STUDIED_RELEASES, studied_algos = ALGOS, studied_scorers=SCORERS): 
    selected_df = df[(df["file"].isin(studied_files)) & df['model'].isin(studied_algos)]
    selected_df = selected_df.sort_values(by = ['file', 'model'])
    corr_df = {}
    for scorer in studied_scorers: 
        corr_df[scorer] = selected_df[selected_df['scorer'] == scorer]['group'].values
    print(corr_df)
    corr_df = pd.DataFrame(corr_df)
    correlation_matrix = corr_df.corr(method='spearman')
    return correlation_matrix




def plot_metrics_scorers_exclusive_heatmaps(dfs, algos, scorers, ax, vmin=0, vmax = 16, cbar =True): 
    heatmap_df = not_exclusive_better_metric(dfs, algos=algos, scorers=scorers).pivot(index="Metric", columns="Scorer", values="Count")
    heatmap_df.index = pd.CategoricalIndex(heatmap_df.index, categories= scorers)
    heatmap_df.sort_index(level=0, inplace=True)
    #heatmap_df.sort_index(level=0, ascending=True, inplace=True)
    
    sns.heatmap(heatmap_df, annot=True, ax=ax, vmin = vmin, vmax=vmax, cbar=cbar)
    ax.set_xlabel("Tuning metric")
    ax.set_ylabel("Performance metric")

def not_exclusive_better_metric(dfs, scorers, algos): 
    heatmap_dict = {metric : {scorer : 0 for scorer in scorers} for metric in dfs}
    for metric, df in dfs.items(): 
        selected_df = df[(df['model'].isin(algos)) & (df['scorer'].isin(scorers))]
        files = selected_df['file'].unique()
        for file in files: 
            file_data = selected_df[selected_df['file'] == file]
            for row_idx, row in file_data.iterrows(): 
                if row['group'] == 1:
                    heatmap_dict[metric][row['scorer'] ] += 1 
          
    heatmap_df = []
    for metric in dfs:
        for scorer in scorers:
            heatmap_df.append({
                'Metric': metric, 
                'Scorer': scorer,
                'Count': heatmap_dict[metric][scorer]
            })
    
    return pd.DataFrame(heatmap_df)

def exclusive_better_metric(dfs, scorers, algos): 
    heatmap_dict = {metric : {scorer : 0 for scorer in scorers} for metric in dfs}
    for metric, df in dfs.items(): 
        selected_df = df[(df['model'].isin(algos)) & (df['scorer'].isin(scorers))]
        files = selected_df['file'].unique()
        for file in files: 
            file_data = selected_df[selected_df['file'] == file]
            best_scorers = []
            scorer_is_metric = False
            for row_idx, row in file_data.iterrows(): 
                if row['group'] == 1:
                    if row['scorer'] ==  metric:
                        scorer_is_metric=True 
                        break
                    else: 
                        best_scorers.append(row['scorer'])
                
            if scorer_is_metric: 
                heatmap_dict[metric][metric] += 1 
            else :
                for best_scorer in best_scorers: 
                    heatmap_dict[metric][best_scorer] += 1 
    
    heatmap_df = []
    for metric in dfs:
        for scorer in scorers:
            heatmap_df.append({
                'Metric': metric, 
                'Scorer': scorer,
                'Count': heatmap_dict[metric][scorer]
            })
    
    return pd.DataFrame(heatmap_df)
    
def plot_metrics_scorers_heatmaps(dfs, algos, scorers): 
    heatmap_df = compute_metrics_scorers_heatmap(dfs, models=algos, scorers=scorers).pivot(index="Scorer", columns="Metric", values="Count")
    heatmap_df.sort_index(level=0, ascending=True, inplace=True)

    print(heatmap_df)
    sns.heatmap(heatmap_df, annot=True)

def compute_metrics_scorers_heatmap(dfs, scorers=SCORERS, models = ALGOS): 
    heatmap_dict = {metric : {scorer : 0 for scorer in scorers} for metric in dfs}
    for metric, df in dfs.items(): 
        selected_df = df[df['model'].isin(models)]
        selected_df = selected_df[selected_df['scorer'].isin(scorers)]
        print(selected_df)
        for row_ind, row in selected_df.iterrows(): 
            if row['group']==1: 
                 heatmap_dict[metric][row['scorer']] +=1
    
    heatmap_df = []
    for metric in dfs:
        for scorer in scorers:
            heatmap_df.append({
                'Metric': metric, 
                'Scorer': scorer,
                'Count': heatmap_dict[metric][scorer]
            })
    
    return pd.DataFrame(heatmap_df)
    


def plot_algo_scorers_heatmap(data, algos, scorers): 
    heatmap_df = compute_algo_scorers_heatmap(data, algos, scorers).pivot(index="Scorer", columns="Model", values="Count")
    sns.heatmap(heatmap_df, annot=True)

def select_algo_performance(df,file,algo, scorer) : 
  
    selected_row=df[(df['file'] == file) & (df['model'] == algo) & (df['scorer'] == scorer)].to_dict(orient='records')
    if len(selected_row) == 0:
        return None 
    return selected_row[0]

def extract_release(release_name):
    if release_name == 'wicket-1.3.0-incubating-beta-1': 
        return '1.3.B1'

    if  release_name ==  'wicket-1.3.0-beta2':
        return '1.3.B2'
    
    project_name, release =  release_name.split('-')
    return release.replace('_','.').replace('BETA', 'B').replace('B.1', 'B1').replace('B.2','B2')

def compute_algo_scorers_heatmap(data, algos, scorers): 
    heatmap = {algo:{scorer: None for scorer in scorers} for algo in algos}
    for row_index, row in data.iterrows():
        algo = row['model']
        scorer = row['scorer']
        if heatmap[algo][scorer] == None:
            heatmap[algo][scorer] = 0
        if row['group'] == 1:
            heatmap[algo][scorer] += 1
    
    heatmap_df = []
    for algo in algos:
        for scorer in scorers: 
            heatmap_df.append({
                "Model": algo, 
                "Scorer": scorer,
                "Count": heatmap[algo][scorer]
            })
    return pd.DataFrame(heatmap_df)

def label_metadata(dfs,studied_files = STUDIED_RELEASES, studied_models=ALGOS, studied_scorers=SCORERS): 
    labels = []
    selected_dfs = {metric: df[(df['model'].isin(studied_models)) & (df['scorer'].isin(studied_models))] for metric, df in dfs.items() }
    for file in studied_files:
        pass



In [33]:
metrics_improvements = {
    'MCC': compute_improvements(DATA['MCC']['data'], 'MCC'),
    'G' : compute_improvements(DATA['G']['data'], 'Gmean'),
    'F1' :  compute_improvements(DATA['F1']['data'], 'F1'),
    'AUC' :  compute_improvements(DATA['AUC']['data'], 'AUC', studied_algos=['RF', 'DT', 'KNN']),
    }

In [31]:
metrics_improvements

{'MCC': {'RF': [(25.0, 'AUC', 'activemq-5.0.0'),
   (18.75, 'BAL_ACC', 'activemq-5.2.0'),
   (4.35, 'LogLoss', 'activemq-5.3.0'),
   (12.2, 'LogLoss', 'derby-10.2.1.6'),
   (5.0, 'AUC', 'derby-10.3.1.4'),
   (13.79, 'F1', 'groovy-1_5_7'),
   (15.62, 'AUC', 'hbase-0.94.0'),
   (10.0, 'BAL_ACC', 'hbase-0.95.0'),
   (3.7, 'LogLoss', 'hive-0.10.0'),
   (4.44, 'Gmean', 'jruby-1.1'),
   (11.11, 'AUC', 'jruby-1.4.0'),
   (6.67, 'BAL_ACC', 'jruby-1.5.0'),
   (20.69, 'LogLoss', 'lucene-2.3.0'),
   (2.56, 'LogLoss', 'lucene-2.9.0'),
   (26.67, 'AUC', 'lucene-3.0.0')],
  'DT': [(36.67, 'AUC', 'activemq-5.1.0'),
   (15.62, 'BAL_ACC', 'activemq-5.2.0'),
   (13.5, 'F1', 'activemq-5.3.0'),
   (10.53, 'LogLoss', 'derby-10.2.1.6'),
   (9.37, 'F1', 'derby-10.3.1.4'),
   (59.09, 'LogLoss', 'groovy-1_5_7'),
   (13.89, 'AUC', 'groovy-1_6_BETA_1'),
   (42.31, 'ACC', 'hbase-0.95.0'),
   (8.57, 'AUC', 'hive-0.9.0'),
   (3.85, 'ACC', 'hive-0.10.0'),
   (4.65, 'F1_W', 'jruby-1.1'),
   (25.81, 'ACC', 'jruby-1.4.

In [34]:
for metric, met_data in metrics_improvements.items():
    print(metric)
    for model, model_data in  met_data.items(): 
        print(model)
        print(min(model_data),'% -', np.mean(model_data) , max(model_data), '%')

MCC
RF
2.56 % - 12.036666666666667 26.67 %
DT
3.33 % - 18.574666666666666 59.09 %
KNN
5.0 % - 42.451428571428565 150.0 %
FFT
2.38 % - 20.697 57.89 %
G
RF
1.28 % - 11.604615384615384 102.94 %
DT
1.28 % - 6.735999999999998 27.12 %
KNN
1.69 % - 7.952 21.95 %
FFT
4.23 % - 5.722499999999999 8.45 %
F1
RF
4.92 % - 33.17999999999999 258.33 %
DT
1.89 % - 15.686875 55.17 %
KNN
3.23 % - 16.31230769230769 57.89 %
FFT
1.37 % - 22.900000000000002 94.87 %
AUC
RF
1.19 % - 1.7766666666666664 4.76 %
DT
1.22 % - 7.05 37.5 %
KNN
1.25 % - 2.72 6.85 %


In [None]:
RQ1_tables(DATA)

In [None]:
auc_data = DATA['MCC']['data']
selected_row=auc_data[(auc_data['file'] == 'activemq-5.0.0') & (auc_data['model'] == 'FFT') & (auc_data['scorer'] == 'LogLoss')].to_dict(orient='records')

In [None]:
plot_algo_scorers_heatmap(auc_data, algos=['RF', 'DT', 'KNN', 'FFT'], scorers=['MCC', 'Gmean', 'F1', 'F1_W', 'ACC', 'BAL_ACC', 'AUC', 'LogLoss'])

In [None]:

fig, axs = plt.subplots(nrows=1, ncols=4, figsize = (25, 5))
plot_metrics_scorers_exclusive_heatmaps({
    'AUC':  DATA['AUC']['data'],
    'MCC' : DATA['MCC']['data'],
    'Gmean' : DATA['G']['data'], 
    'F1' : DATA['F1']['data'],
},algos=['RF'], scorers=['MCC', 'Gmean', 'F1','F1_W','ACC', 'BAL_ACC',  'LogLoss', 'AUC']
, ax=axs[0], cbar=False)
axs[0].set_title("RF")

plot_metrics_scorers_exclusive_heatmaps({
    'AUC':  DATA['AUC']['data'],
    'MCC' : DATA['MCC']['data'],
    'Gmean' : DATA['G']['data'], 
    'F1' : DATA['F1']['data'],
},algos=['DT'], scorers=['MCC', 'Gmean', 'F1','F1_W','ACC', 'BAL_ACC', 'LogLoss', 'AUC']
,ax = axs[1], cbar=False)
axs[1].set_title("DT")
axs[1].set_ylabel("")
plot_metrics_scorers_exclusive_heatmaps({
    'AUC':  DATA['AUC']['data'],
    'MCC' : DATA['MCC']['data'],
    'Gmean' : DATA['G']['data'], 
    'F1' : DATA['F1']['data'],
},algos=['KNN'], scorers=['MCC', 'Gmean', 'F1','F1_W','ACC', 'BAL_ACC', 'LogLoss', 'AUC']
,ax = axs[2], cbar=False)
axs[2].set_title("KNN")
axs[2].set_ylabel("")


plot_metrics_scorers_exclusive_heatmaps({
    #'AUC':  DATA['AUC']['data'],
    'MCC' : DATA['MCC']['data'],
    'Gmean' : DATA['G']['data'], 
    'F1' : DATA['F1']['data'],
},algos=['FFT'], scorers=['MCC', 'Gmean', 'F1','F1_W','ACC', 'BAL_ACC']
,ax = axs[3])
axs[3].set_title("FFT")
axs[3].set_ylabel("")
plt.tight_layout()
with PdfPages('performance_tuning_metrics.pdf') as pdf:
    pdf.savefig()
    plt.close()





In [None]:
plot_corrolation(df=DATA['G']['data'], studied_algos=['KNN'])

In [None]:
plot_corrolation(df=DATA['G']['data'], studied_algos=['RF'])

In [None]:
fig_MCC = plot_per_metric('MCC',DATA['MCC']['data'] )
fig_G = plot_per_metric('G',DATA['G']['data'] )
fig_F1 = plot_per_metric('F1',DATA['F1']['data'] )
fig_AUC = plot_per_metric('AUC',DATA['AUC']['data'],studied_algos = ['RF', 'DT', 'KNN'] )

In [None]:
 fig, ax= plt.subplots()
 scatter_ranks_plot(DATA['F1']['data'], scorerX='BAL_ACC', scorerY='LogLoss', algo='RF', ax=ax)

In [None]:
plot_metric_corrolation(dfs = {
    'MCC': DATA['MCC']['data'],
    'F1' : DATA['F1']['data'],
    'Gmean': DATA['G']['data']#, 
    #'AUC' : DATA['AUC']['data']
    }
    ,
     scorer = 'ACC', algo='FFT', files = ALL_STUDIED_RELEASES)