In [None]:
import numpy as np
import pandas as pd
import os 
import glob
import pdb
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import seaborn as sns
import scipy as sp
from scipy.special import expit
import sys

In [None]:
sys.path.append(os.path.dirname(os.getcwd()))
from visualisation_and_evaluation.helpers_eval import extract_scores, wa, select_best_run
from visualisation_and_evaluation.helpers_vizualisation import plot_scores

In [None]:
wd = os.getcwd()
path_scores = os.path.abspath(os.path.join(wd,"..","..","data/Dataset5/eval_scores"))

files = [x for x in os.listdir(path_scores) if 'full' in x and 'score' in x]
selection_method = 'wa' #'wa' (weighted average), 'div' (based on divergence score only)
eval_all = pd.read_csv(path_scores+'scores_baselines_full.csv', index_col=[0])
eval_all['silhouette_score_neg'] = -eval_all['silhouette_score']
for fname in files:
    if(fname == 'scores_baselines_full.csv'):
        continue
    df = extract_scores(path_scores, fname)
    # select "the best run"
    df_best = select_best_run(df, method=selection_method)
    df_best = df_best.loc[:,eval_all.columns]
    eval_all = eval_all.append(df_best)
eval_all = eval_all.sort_values(['sample', 'method']).reset_index(drop=True)


In [None]:
# add the weighted averaged score
eval_all['wa'] = eval_all[['divergence_score', 'entropy_score', 'silhouette_score_neg']].apply(lambda x: wa(x), axis=1)
eval_all


In [None]:
### plot scores
f, axes = plt.subplots(1, 2, figsize=[10,4], sharex=True)
plot_scores(eval_all, xcol='divergence_score', ycol='entropy_score',ax = axes[0], legend=False)
plot_scores(eval_all, xcol='divergence_score', ycol='silhouette_score_neg',ax = axes[1])
# plt.savefig(os.path.join(path_scores, 
#                          'summary_scores_all_samples.png'),  bbox_inches='tight')

eval_sample5 = eval_all.loc[eval_all['sample']=='sample5',:]
title5 = 'Evaluation scores: sample5'
f, axes = plt.subplots(1, 2, figsize=[10,4], sharex=True)
plot_scores(eval_sample5, xcol='divergence_score', ycol='entropy_score', title=title5, ax = axes[0], legend=False)
plot_scores(eval_sample5, xcol='divergence_score', ycol='silhouette_score_neg', title=title5, ax = axes[1])

In [None]:
score_plot = sns.scatterplot(x='method', y='wa', data=eval_all,
                hue = 'method', style='sample', legend='brief', ax = None)
handles, names = score_plot.get_legend_handles_labels()
score_plot.legend(handles, names, bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
score_plot.set_xticks([])
score_plot.set_ylabel('Weighted average of scores')
#score_plot.set_xticklabels(sp.unique(eval_all['method']), rotation=90)
plt.savefig(os.path.join(path_scores, 
                         'summary_scores_all_samples_wa.png'),  bbox_inches='tight')