## Import

In [None]:
#General
import pandas as pd
import numpy as np
from numpy import std, mean, sqrt
import math
#Statistics
from scipy.stats import mannwhitneyu
#Visualization
from matplotlib import pyplot as plt
import matplotlib
#Utility
from tabulate import tabulate

In [None]:
#Custom imports
from stat_util import normal_test_all_model_samples
from stat_util import t_test_model_samples_against_other_model_samples
from stat_util import cohen_d_test_model_samples_against_other_model_samples

## Dataset preparation

In [None]:
datasets = [
    "cm1","jm1","kc1","kc2","pc1"
]

In [None]:
main_model_name = "REPDX"

In [None]:
main_df = None

for dataset in datasets:
    print("Analyzing:", dataset)
    results_df = pd.read_csv("results_promise/balance_"+dataset)
    #results_df = pd.read_csv("results_promise/hp_"+dataset)
    #results_df = pd.read_csv("results_promise/hr_"+dataset)
    results_df = results_df.assign(Dataset=lambda x: dataset)
    #
    #results_df.loc[results_df["Model"]=="OLD_REPD", 'Model'] = "REPD"
    #results_df.loc[results_df["Model"]=="REPD_EX", 'Model'] = "REPDX"
    #
    if main_df is None:
        main_df = results_df
    else: 
        main_df = pd.concat([main_df,results_df])

    #
    #print(results_df[results_df["F1 score"]==results_df["F1 score"].max()][["Model","F1 score","Precision","Recall"]])
    #print(results_df[results_df["Precision"]==results_df["Precision"].max()][["Model","F1 score","Precision","Recall"]])
    #print(results_df[results_df["Recall"]==results_df["Recall"].max()][["Model","F1 score","Precision","Recall"]])
    #print()
    #
    group_df = results_df \
                .drop(columns=['Accuracy', 'Accuracy base', 'Precision base', 'Recall base', 'F1 base score']) \
                .rename(index=str, columns={"F1 score": "F1"}) \
                .groupby(["Model"])
    print(group_df[["F1","Precision","Recall"]].median())
    #print(group_df["Precision"].median())
    #print(group_df["Recall"].median())
    print()
    #
    #
    #stat, p = mannwhitneyu(
    #            results_df[results_df["Model"]=="REPDX"]["F1 score"].values, 
    #            results_df[results_df["Model"]=="REPD"]["F1 score"].values
    #            )
    #stat, p = mannwhitneyu(
    #            results_df[results_df["Model"]=="REPDX"]["Precision"].values, 
    #            results_df[results_df["Model"]=="REPD"]["Precision"].values
    #            )
    #stat, p = mannwhitneyu(
    #            results_df[results_df["Model"]=="REPDX"]["Recall"].values, 
    #            results_df[results_df["Model"]=="REPD"]["Recall"].values
    #            )
    #
    #print('Statistics=%.3f, p=%.3f' % (stat, p))
    #print()
    # interpret
    #alpha = 0.05
    #if p > alpha:
    #    print('Same distribution (fail to reject H0)')
    #else:
    #    print('Different distribution (reject H0)')
    #print()
    #print()


In [None]:
group_df = main_df.drop(columns=['Accuracy', 'Accuracy base', 'Precision base', 'Recall base', 'F1 base score']).rename(index=str, columns={"F1 score": "F1"}).groupby(["Dataset","Model"])

In [None]:
#print(group_df[["F1"]].median().to_latex())
#print(group_df[["Precision"]].median().to_latex())
print(group_df[["Recall","Precision"]].median().to_latex())



In [None]:
import seaborn as sns

In [None]:
font = {'size': 58}
matplotlib.rc('font', **font)

def plot_measure(measure_name,plot_name):
    df = main_df.drop(columns=['Accuracy', 'Accuracy base', 'Precision base', 'Recall base', 'F1 base score'])
    new_dfs = []

    dataset_list = list(set(df.Dataset.values))
    dataset_list.sort()

    for n, dataset in enumerate(dataset_list):
        new_df = []

        for model in list(set(df.Model.values)):
            f1_scores = []
            temp = df.loc[(df['Dataset'] == dataset) & (df['Model'] == model)][measure_name].values
            for value in temp:
                d = {
                    'DatasetModel' : '{: <26}'.format(", ".join([dataset, model])), measure_name : value
                }
                new_df.append(d)

        new_df = pd.DataFrame(new_df)
        new_dfs.append(new_df)

    #Plotting prepared dataset
    fig, axs = plt.subplots(5, 1, figsize=(64,128), sharex=True, sharey="row", gridspec_kw={'hspace': 0.1})

    for n, new_df in enumerate(new_dfs):
        dataset = dataset_list[n]
        ax=axs[n]
        sns.violinplot(measure_name,'DatasetModel', data=new_df, ax = ax, scale="count", cut=0)
        #
        yax = ax.get_yaxis()
        yax.grid(True)
        yax.set_ticks_position('right')
        pad = max([len(i) for i in new_df['DatasetModel'].values])
        yax.set_tick_params(pad=pad)
        y_label = yax.get_label()
        y_label.set_visible(False)
        #
        xax = ax.get_xaxis()
        xax.grid(True)
        x_label = xax.get_label()
        x_label.set_visible(False)
        #
        ax.set_title("")

    fig.suptitle(measure_name)
    st = fig.suptitle(measure_name, fontsize="x-large")
    st.set_y(0.95)
    fig.subplots_adjust(top=0.93)
    fig.tight_layout()
    plt.tight_layout()
    plt.savefig(plot_name,format='pdf',bbox_inches='tight')
    plt.show()

In [None]:
plot_measure("F1 score","promise-wp-hr-f1.pdf")

In [None]:
plot_measure("Precision","promise-wp-hr-precision.pdf")

In [None]:
plot_measure("Recall","promise-wp-hr-recall.pdf")