In [1]:
import glob, os
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from math import floor
import matplotlib

font = {'family' : 'normal',
        'size'   : 14}

matplotlib.rc('font', **font)

def get_summary_df(path='./', metric='avgp', proportions=(0.1, 0.25, 0.5, 0.75, 1.0)):
    result = []
    for prop in proportions:
        runs = []
        for folder in glob.glob(path +"/results_celeba_*" + str(prop) + "/run.*"):
            df = pd.read_csv(folder + '/result_summary_celeba.csv').drop(['classifier'], 1)
            runs.append(df)
        mean = pd.concat(runs).groupby(['dataset']).mean()[[metric]].rename(columns={metric: 'mean'})
        std = pd.concat(runs).groupby(['dataset']).std()[[metric]].rename(columns={metric: 'std'})
        std['proportion'] = prop
        result.append(pd.concat([mean, std], axis=1))
    
    return pd.concat(result)

def plot_results_clf(path='./', metrics=['avgp', 'auc', 'bacc'], proportions=(0.1, 0.25, 0.5, 0.75, 1.0),\
                     title='AUC Average Precision', opts=['imbalanced', 'oversampled',\
                    'augmented'], width = 0.04, baseline=None):
    color_map = {'imbalanced': 'C0', 'oversampled': 'C1', 'augmented': 'C2', 'synthesized': 'C2', 'balanced': 'C4'}
    label_map = {'imbalanced': 'original', 'oversampled': 'oversampling',\
                 'augmented': 'GAN sampling', 'synthesized': 'GAN sampling', 'balanced': 'original'}
    title_map = {'avgp': 'AUC Average Precision', 'auc': 'AUC ROC Curve', 'bacc': 'Balanced Accuracy', 'acc': 'Accuracy'}
    all_paths = ['./smiling', './attractive', './lipstick', './high_cheekbones', 'mouth_slightly_open']
    path_to_title = ['Smiling', 'Attractive', 'Wearing Lipstick', 'High Cheekbones', 'Mouth Slightly Open']
    paths = all_paths if 'acc' in metrics else [path]
    offset = 0 if len(opts) == 1 else -1
    summary = {}

    h_axes_len = 3
    v_axes_len = 2 if 'acc' in metrics else 1
    
    fig, axes = plt.subplots(v_axes_len, h_axes_len, sharey=True, figsize=(15, 5*v_axes_len))
    if len(axes.shape) == 1: 
        axes = np.expand_dims(axes, 0) 
    
    for k, path in enumerate(paths):
        summary[path] = {}
        for j, metric in enumerate(metrics):
            summary[path][metric] = get_summary_df(path, metric, proportions)
            vidx = floor((k+j) / 3)
            hidx = (k+j) % 3
            for i, opt in enumerate(opts):
                df = summary[path][metric].loc[[opt]]
                axes[vidx][hidx].bar(df['proportion'] + (i + offset) * width, df['mean'], width, yerr=df['std'], label=label_map[opt], color=color_map[opt])
            if baseline:
                df = summary[path][metric].loc[[baseline]]
                axes[vidx][hidx].axhline(np.mean(df['mean']), ls='--', label=label_map[baseline], color=color_map[baseline])
            # plt.legend(loc='center left', bbox_to_anchor=(0.5, 1.05))
            box = axes[vidx][hidx].get_position()
            axes[vidx][hidx].set_position([box.x0, box.y0, box.width * 1.0, box.height])
            # Put a legend to the right of the current axis
            #elif (metric == 'acc' and path=='./lipstick'):
            #    axes[i].legend(loc='center left', bbox_to_anchor=(0.32, 1.2), ncol=2)


            if offset < 0:
                if path == './smiling' or path == './lipstick':
                    axes[vidx][hidx].set_ylim(0.75, 1.0)
                else:
                    axes[vidx][hidx].set_ylim(0.4, 1.0)

            axes[vidx][hidx].set_xticks(proportions)
            if vidx == 0: 
                y_label = 'Accuracy' if 'acc' in metrics else 'Score'
                axes[vidx][0].set_ylabel(y_label)
            axes[vidx][hidx].set_xlabel('Imbalance Ratio')
            if 'acc' in metrics:
                axes[vidx][hidx].set_title(path_to_title[k+j])
            else:
                axes[vidx][hidx].set_title(title_map[metric])

    if 'acc' in metrics:
        axes[1][1].legend(loc='upper center', ncol=1, bbox_to_anchor=(1.4, 0.5))
        fig.delaxes(axes.flatten()[-1])
        plt.tight_layout()
    else:
        axes[0][1].legend(loc='upper left', bbox_to_anchor=(-0.4, 1.25), ncol=3)
    plt.show()

## Smiling

In [2]:
plot_results_clf('./smiling')

In [3]:
plot_results_clf(path='./smiling', metrics=['acc'], title='Accuracy', opts=['synthesized'], baseline='balanced', width=0.1)

## Attractive

In [4]:
plot_results_clf('./attractive')

## Lipstick

In [5]:
plot_results_clf('./lipstick')

## High Cheekbones

In [6]:
plot_results_clf('./high_cheekbones')

## Mouth Slightly Open

In [7]:
plot_results_clf('./mouth_slightly_open')

# Rank comparison

In [27]:
def get_ranks_for_expression(path, metric='avgp', proportions=[0.1, 0.25, 0.5, 0.75, 1.0]):
    df = get_summary_df(path, metric).reset_index()
    df = df.loc[df['dataset'].isin(['imbalanced', 'oversampled', 'augmented'])]
    df = df.loc[df['proportion'].isin(proportions)]
    ranks = df.groupby(['proportion'])['mean'].rank(ascending=False)
    df['rank'] = ranks
    df['attr'] = path[2:]
    return df

def get_ranks(paths=['./smiling', './attractive', './lipstick', './high_cheekbones', 'mouth_slightly_open'], metric='avgp'):
    ranks = [get_ranks_for_expression(p, metric) for p in paths]
    return pd.concat(ranks, ignore_index=True)

def plot_ranks(paths=['./smiling', './attractive', './lipstick', './high_cheekbones', 'mouth_slightly_open'], metrics=['avgp', 'auc', 'bacc'], title='AUC Average Precision'):
    opts=['imbalanced', 'oversampled', 'augmented']
    color_map = {'imbalanced': 'C0', 'oversampled': 'C1', 'augmented': 'C2', 'synthesized': 'C2', 'balanced': 'C4'}
    label_map = {'imbalanced': 'original', 'oversampled': 'oversampling',\
                 'augmented': 'GAN sampling', 'synthesized': 'GAN sampling', 'balanced': 'original'}
    title_map = {'avgp': 'AUC Average Precision', 'auc': 'AUC ROC Curve', 'bacc': 'Balanced Accuracy'}

    fig, axes = plt.subplots(1, len(metrics), sharey=True, figsize=(5*len(metrics), 5))
    
    for i, metric in enumerate(metrics):
        df = get_ranks(paths, metric).groupby(['dataset', 'proportion']).mean()

        for opt in opts:
            axes[i].plot(df['rank'][opt].index.values, df['rank'][opt].values, color=color_map[opt], label=label_map[opt])

        axes[i].set_title(title_map[metric])
        axes[i].set_xlabel('Imbalance Ratio')
    
    axes[0].set_ylabel('Ranking Score')
    plt.legend(loc='center left', bbox_to_anchor=(-1.7, 1.2), ncol=3)
    plt.ylim([0.8,3.2])
    plt.show()

In [28]:
plot_ranks()

In [10]:
from scipy.stats import friedmanchisquare
from scipy.stats import kruskal

def get_measurements(opts, metric, proportions):
    df = get_ranks(metric=metric)
    df = df.loc[df['proportion'].isin(proportions)]
    measurements = []
    for opt in opts:
        ms = df.loc[df['dataset'] == opt]['rank'].values
        measurements.append(ms)
    return measurements


def friedman_test(metric='avgp', proportions=[0.1, 0.25, 0.5]):
    opts = ['imbalanced', 'oversampled', 'augmented']
    measurements = get_measurements(opts, metric, proportions)
    return friedmanchisquare(*measurements)

In [11]:
friedman_test()

In [12]:
friedman_test('auc')

In [13]:
friedman_test('bacc')

In [14]:
from scipy.stats import wilcoxon

def wilcoxon_test(opts, metric='avgp', proportions=[0.1, 0.25, 0.5]):
    assert len(opts) == 2
    measurements = get_measurements(opts, metric, proportions)
    return wilcoxon(measurements[0], measurements[1])

In [15]:
wilcoxon_test(['imbalanced', 'augmented'])

In [16]:
wilcoxon_test(['oversampled', 'augmented'])

In [17]:
wilcoxon_test(['imbalanced', 'augmented'], 'auc')

In [18]:
wilcoxon_test(['oversampled', 'augmented'], 'auc')

In [19]:
wilcoxon_test(['imbalanced', 'augmented'], 'bacc')

In [20]:
wilcoxon_test(['oversampled', 'augmented'], 'bacc')