In [None]:
RESULTS_FOLDER = "/path/to/results/folder"

In [None]:
import os

import numpy as np
import matplotlib.pyplot as plt
import matplotlib
import scipy.stats
matplotlib.rc('font', family='serif', size=24)
matplotlib.rc('errorbar', capsize=3)

In [None]:
suffix = os.path.join("inv_lambda-1.0_epsilon-0.01_apply_dp-True", "data_dp_round_1.npz")
def _load_client_data(exp_name):
    return {
        (0.125, 10): np.load(os.path.join(RESULTS_FOLDER, exp_name, "split_sym_neq_50x0.125_validation_fraction_0.2", suffix)),
        (0.25, 10): np.load(os.path.join(RESULTS_FOLDER, exp_name, "split_sym_neq_50x0.25_validation_fraction_0.2", suffix)),
        (0.5, 10): np.load(os.path.join(RESULTS_FOLDER, exp_name, "split_sym_neq_50x0.5_validation_fraction_0.2", suffix)),
        (0.75, 10): np.load(os.path.join(RESULTS_FOLDER, exp_name, "split_sym_neq_50x0.75_validation_fraction_0.2", suffix)),
        (1.0, 10): np.load(os.path.join(RESULTS_FOLDER, exp_name, "split_sym_eq_50_validation_fraction_0.2", suffix)),
        (1.5, 10): np.load(os.path.join(RESULTS_FOLDER, exp_name, "split_sym_neq_50x1.5_validation_fraction_0.2", suffix)),
        (2.0, 10): np.load(os.path.join(RESULTS_FOLDER, exp_name, "split_sym_neq_50x2_validation_fraction_0.2", suffix)),
        (4.0, 10): np.load(os.path.join(RESULTS_FOLDER, exp_name, "split_sym_neq_50x4_validation_fraction_0.2", suffix)),
    }
adult_client_data = _load_client_data("adult")
kdd_client_data = _load_client_data("kdd99")

In [None]:
def convert_width(p, w):
    return 10**(np.log10(p)+w/2.)-10**(np.log10(p)-w/2.)


def plot(ax, label_to_client_data, title, series, ylabel: bool = False, legend = False):
    ax.set_title(title)
    ax.grid(which='both', axis='both')
    ax.minorticks_on()
    
    ax.set_xscale('log')
    ax.set_xlabel("Relative Dataset Size")
    ax.set_xbound(0.0625, 8.0)
    ax.set_xticks([0.25, 1.0, 4.0], minor=False)
    ax.set_xticks([0.125, 0.5, 2.0], minor=True)
    ax.xaxis.set_minor_formatter(plt.NullFormatter())
    ax.set_autoscalex_on(False)
    ax.set_xticklabels(['{:.2f}'.format(x) for x in ax.get_xticks()])
    
    if ylabel:
        ax.set_ylabel("Score\n %tile")
    ax.set_ybound(0.0, 1.0)
    ax.set_yticks([0, 0.5, 1.0], minor=False)
    ax.set_yticks([0.25, 0.75], minor=True)
    ax.set_autoscaley_on(False)
    ax.set_yticklabels(['{:,.0%}'.format(x) for x in ax.get_yticks()])

    fmts = ['gD', 'ro']
    markersizes = (18, 24)
    
    for fmt, markersize, (label, client_data) in zip(fmts, markersizes, label_to_client_data.items()):
        xs = np.array(list(key[0] for key in client_data.keys()))
        means_pct = []
        lower_pct_errors = []
        upper_pct_errors = []
        for (x, num_special_clients), data in client_data.items():
            all_scores = np.squeeze(data[series])
            special_y = all_scores[:num_special_clients]
            mean = np.mean(special_y)
            mean_pct = scipy.stats.percentileofscore(all_scores, mean, kind='strict') / 100
            means_pct.append(mean_pct)
            std = np.std(special_y)
            lower_pct = scipy.stats.percentileofscore(all_scores, mean - std, kind='strict') / 100
            lower_pct_errors.append(mean_pct - lower_pct)
            upper_pct = scipy.stats.percentileofscore(all_scores, mean + std, kind='strict') / 100
            upper_pct_errors.append(upper_pct - mean_pct)
        correlation = scipy.stats.pearsonr(np.log10(xs), means_pct)
        print("correlation", correlation)
        lw = 1 
        # yerr=[lower_pct_errors, upper_pct_errors]
        ax.errorbar(xs, means_pct, fmt=fmt, label=label, markersize=12, linewidth=lw)
    if legend:
        ax.legend(loc='center left', bbox_to_anchor=(1.1, 0.5))

In [None]:
fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(22,4), sharey=True)
label_to_client_data = {
    "Adult": adult_client_data,
    "KDD": kdd_client_data
}
plot(ax1, label_to_client_data, title="Model Score ($m_k$)", series="model_scores", ylabel=True)
plot(ax2, label_to_client_data, title="Evaluation Score ($d_k$)", series="auditor_score")
plot(ax3, label_to_client_data, title="Overall Score ($p_k$)", series="overall_score", legend=True)

plt.tight_layout()
plt.savefig("exp2.pdf")
plt