In [None]:
# imports
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import numpy as np

# NOTE: Change `<PATH_TO_TEX>` to an appropriate texlive installation
# add latex 
import os
os.environ["PATH"] += os.pathsep + '<PATH_TO_TEX>/bin/x86_64-linux'

img_dir = 'results/cv_images/'
os.makedirs(img_dir, exist_ok=True)

# graphing
plt.rcParams.update({
    'font.size': 13,
    'text.usetex': True,
    'text.latex.preamble': r'\usepackage{libertine}\usepackage[libertine]{newtxmath} \usepackage{sfmath}',
    'font.family': 'sans-serif',
})
palette = plt.cm.jet(np.linspace(0,1,21))
all_colors = [matplotlib.colors.to_hex(color, keep_alpha=True) for color in palette]

markers = ['s', '*', 'o', '^', 'p', '1', 'P', 'X']
colors = [all_colors[-1], all_colors[5],  all_colors[-3], all_colors[7], all_colors[15]]

model_names = ['DSynthPB', 'NIST_MST', 'DPWGAN', 'DPartPB', 'MST', 'DPWGANCity']
model_name_labels = ['PrivBayes (DS)', 'MST (NIST)', 'DPWGAN (NIST)', 'PrivBayes (Hazy)', 'MST (Smartnoise)', 'DPWGAN (Synthcity)']

In [None]:
# plot vertical bars
def plot_barv(ax, resultss, labels, upper_bound=None, title=None, xticklabels=None, capsize=3):
    if xticklabels is None:
        xticklabels = model_name_labels
    xticks = np.arange(len(xticklabels))
        
    # sort all results
    custom_dict = {model_name: i for i, model_name in enumerate(model_names)} 
    sorted_resultss = []
    for results in resultss:
        results = results.sort_values(by=['theor_eps'])
        sorted_resultss.append(results.sort_values(by=['model'], key=lambda x: x.map(custom_dict)))

    if len(resultss) == 1:
        width = 0.4
        curr_colors = [colors[4]]
    elif len(resultss) == 2:
        width = 0.4
        curr_colors = [colors[4], colors[1]]
    elif len(resultss) == 3:
        width = 0.25
        curr_colors = [colors[4], colors[1], all_colors[-1]]
    elif len(resultss) == 4:
        width = 0.2
        curr_colors = [colors[4], colors[1], all_colors[-1], colors[3]]
    elif len(resultss) == 5:
        width = 0.15
        curr_colors = [all_colors[15], all_colors[5],  all_colors[-3], all_colors[7], all_colors[-1]]
    elif len(resultss) == 6:
        width = 0.125
        curr_colors = [all_colors[15], all_colors[5],  all_colors[-3], all_colors[7], all_colors[-1], all_colors[16]]
    
    if len(resultss) % 2 == 0:
        # even
        seed_pos = [(i + 0.5) * width for i in range(len(resultss) // 2)]
        positions = [-pos for pos in reversed(seed_pos)] + seed_pos
    else:
        # odd
        seed_pos = [i * width for i in range(len(resultss) // 2 + 1)]
        positions = [-pos for pos in reversed(seed_pos[1:])] + seed_pos

    offset = 0.25

    for pos, results, color, label in zip(positions, sorted_resultss, curr_colors, labels):
        ax.bar(xticks + pos, results['emp_eps_mean'] + offset, width=width, bottom=-offset, zorder=2, color=color, label=label, yerr=results['emp_eps_std'], capsize=capsize)

    ax.set_xticks(xticks)
    ax.set_xticklabels([label.replace(' ', '\n') for label in xticklabels])

    ax.set_ylim(-offset, 30)
    ax.set_ylabel('Empirical $\\varepsilon_{emp}$')
    ax.grid(color='#DCDCDC', linestyle='-', linewidth=1, zorder=0)

    if upper_bound is not None:
        x_l, x_r, _, _ = ax.axis()
        ax.plot([x_l, x_r], [upper_bound, upper_bound], linestyle='--', color='red', label='Theoretical $\\varepsilon$')
        ax.set_xlim(x_l, x_r)

    if title is not None:
        ax.set_title(title)

In [None]:
# plot horizontal bars
def plot_barh(ax, resultss, labels, upper_bound=None, title=None, capsize=3):
    # sort all results
    custom_dict = {model_name: i for i, model_name in enumerate(model_names)} 
    sorted_resultss = []
    for results in resultss:
        results = results.sort_values(by=['theor_eps'])
        sorted_resultss.append(results.sort_values(by=['model'], key=lambda x: x.map(custom_dict)))

    n_models = len(model_names)

    if len(resultss) == 2:
        positions = [-0.2, +0.2]
        height = 0.4
        curr_colors = [colors[4], colors[1]]

    offset = 0.25

    for pos, results, color, label in zip(positions, sorted_resultss, curr_colors, labels):
        ax.barh(np.arange(n_models) + pos, results['emp_eps_mean'] + offset, height=height, left=-offset, zorder=2, color=color, label=label, xerr=results['emp_eps_std'], capsize=capsize)

    ax.set_yticks(np.arange(n_models))
    ax.set_yticklabels([label.replace(' ', '\n') for label in model_name_labels])

    ax.set_xlim(-offset, 30)
    ax.set_xlabel('Empirical $\\varepsilon_{emp}$')
    ax.grid(color='#DCDCDC', linestyle='-', linewidth=1, zorder=0)

    if upper_bound is not None:
        _, _, y_l, y_r = ax.axis()
        ax.plot([upper_bound, upper_bound], [y_l, y_r], linestyle='--', color='red', label='Theoretical $\\varepsilon$')
        ax.set_ylim(y_l, y_r)

    if title is not None:
        ax.set_title(title)

## Comparing empirical epsilons of Querybased vs DCR

In [None]:
results = pd.read_csv(f'results/cv_shuffle_results/qb_vs_dcr/results_agg.csv')
for i, eps in enumerate([1.0, 4.0]):
    fig, axs = plt.subplots(1, 2)
    fig.subplots_adjust(wspace=0.1)
    for data_name, ax in zip(['Adult', 'Fire'], axs.flat):
        resultss = [
            results[(results['attack_type'] == 'bb_querybased') & (results['data_name'] == data_name.lower()) & (results['theor_eps'] == eps)],
            results[(results['attack_type'] == 'bb_dcr') & (results['data_name'] == data_name.lower()) & (results['theor_eps'] == eps)]
        ]
        plot_barh(ax, resultss, ['Querybased', 'DCR'], upper_bound=eps, title=data_name)

        ax.label_outer()

    if i == 0:
        h,l = axs.flat[1].get_legend_handles_labels()
        fig.legend(h, l, loc='upper center', ncol=len(h), bbox_to_anchor=(0.5, 1.05))

    fig.set_size_inches(10, 5)
    fig.savefig(f'{img_dir}/qb_vs_dcr_eps{eps}.pdf', bbox_inches='tight')

In [None]:
import dill

scoresss = dill.load(open('results/qb_vs_dcr.dill', 'rb'))

for i, (attack_type, attack_type_name) in enumerate(zip(['querybased', 'dcr'], ['Querybased', 'DCR'])):
    fig, axs = plt.subplots(1, 2)
    fig.subplots_adjust(wspace=0.1)
    for data_name, data_title, ax in zip(['adult', 'fire'], ['Adult', 'FIRE'], axs.flat):
        scoress = scoresss[data_name][attack_type]

        scores_in = scoress[scoress[:, 1] == 1][-2000:, 0]
        scores_out = scoress[scoress[:, 1] == 0][-2000:, 0]

        if attack_type == 'dcr':
            bins = np.unique(scores_in).tolist() + [0]
        else:
            bins = (np.arange(11) / 10).tolist()

        ax.hist(scores_out, alpha=0.5, color=colors[1], label='$D\,\'$', bins=bins)
        ax.hist(scores_in, alpha=0.5, color=colors[4], label='$D$', bins=bins)

        ax.set_ylim(0, 2000)

        ax.set_title(data_title)
        
        ax.set_xlabel('Score')
        ax.set_ylabel('Frequency')
        
        ax.label_outer()

    fig.set_size_inches(10, 5)

    if i == 0:
        h,l = axs.flat[1].get_legend_handles_labels()
        fig.legend(h, l, loc='upper center', ncol=len(h), bbox_to_anchor=(0.5, 1.05))

    fig.savefig(f'{img_dir}/qb_vs_dcr_scores_{attack_type}.pdf', bbox_inches='tight')

## Comparing empirical epsilons of worst-case datasets (black-box)

In [None]:
fig, ax = plt.subplots()

results = pd.read_csv(f'results/cv_shuffle_results/test_worst_case/results_agg.csv')
resultss = [results[(results['dataset_type'] == dataset_type) & (results['attack_type'] == 'bb_querybased')] for dataset_type in ['worstcase', 'worstcase_narrow', 'worstcase_repeat', 'worstcase_narrow_repeat']]
plot_barv(ax, resultss, ['small', 'small+narrow', 'small+repeat', 'small+narrow+repeat'], upper_bound=4.0)

ax.set_ylim(-0.25, 5)

h,l = ax.get_legend_handles_labels()
fig.legend(h, l, loc='upper center', ncol=3, bbox_to_anchor=(0.5, 1.05))

fig.set_size_inches(10, 5)
fig.savefig(f'{img_dir}/compare_worstcase_datasets.pdf', bbox_inches='tight')

## Comparing white-box vs black-box attack

In [None]:
fig, ax = plt.subplots()

results = pd.read_csv(f'results/cv_shuffle_results/test_worst_case/results_agg.csv')
results = results[
    ((results['model'] == 'DSynthPB') & (results['dataset_type'] == 'worstcase_narrow')) |
    ((results['model'] == 'DPartPB') & (results['dataset_type'] == 'worstcase_narrow_repeat')) | 
    ((results['model'].isin(['NIST_MST', 'MST', 'DPWGAN', 'DPWGANCity'])) & (results['dataset_type'] == 'worstcase_repeat'))
]
resultss = [results[(results['attack_type'] == attack_type)] for attack_type in ['bb_querybased', 'wb']]
plot_barv(ax, resultss, ['Black-box', 'White-box'], upper_bound=4.0)

ax.set_ylim(-0.25, 6)

h,l = ax.get_legend_handles_labels()
fig.legend(h, l, loc='upper center', ncol=3, bbox_to_anchor=(0.5, 1))

fig.set_size_inches(10, 5)
fig.savefig(f'{img_dir}/bb_vs_wb_worstcase.pdf', bbox_inches='tight')

## Plotting empirical epsilons for various theoretical $\varepsilon$ s of worst-case datasets

In [None]:
fig, ax = plt.subplots()

title = '(Implementation-specific) Worst-case dataset'

results = pd.read_csv(f'results/cv_shuffle_results/worst_case_wb/results_agg.csv')

resultss = [results[(results['model'] == model_name)] for model_name in model_names[:-1]] # exclude DPWGAN (Synthcity)
plot_barv(ax, resultss, model_name_labels[:-1], upper_bound=None, title=title, xticklabels=['$1.0$', '$2.0$', '$4.0$', '$10.0$'])

epses = results['theor_eps'].unique()
for i, eps in enumerate(epses):
    ax.plot([i - 0.45, i + 0.45], [eps, eps], color='red', linestyle='--', label='Theoretical $\\varepsilon$' if i == 0 else None)

ax.set_xlabel('Theoretical $\\varepsilon$')
ax.set_ylim(-0.25, 11.0)

ax.set_yticks(np.arange(12))

ax.legend()

fig.set_size_inches(10, 5)
fig.savefig(f'{img_dir}/worstcase_audit_multeps.pdf', bbox_inches='tight')

## Plotting white-box vs active white-box attack for DPWGAN

In [None]:
fig, ax = plt.subplots()

results = pd.read_csv(f'results/cv_shuffle_results/active_wb/results_agg.csv')

resultss = [results[(results['attack_type'] == attack_type)] for attack_type in ['bb_querybased', 'wb', 'active_wb']]
plot_barv(ax, resultss, ['Black-box', 'White-box', 'Active White-box'], upper_bound=None, title='DPWGAN (NIST)', xticklabels=['$1.0$', '$2.0$', '$4.0$', '$10.0$'])

epses = results['theor_eps'].unique()
for i, eps in enumerate(epses):
    ax.plot([i - 0.4, i + 0.4], [eps, eps], color='red', linestyle='--', label='Theoretical $\\varepsilon$' if i == 0 else None)

ax.set_xlabel('Theoretical $\\varepsilon$')
ax.set_ylim(-0.25, 11.0)
ax.set_yticks(np.arange(12))

ax.legend()

fig.set_size_inches(10, 5)
fig.savefig(f'{img_dir}/wb_vs_active_wb.pdf', bbox_inches='tight')

## DataSynthesizer v0.1.4
- wrong noise scale bug

In [None]:
# calculate maximum epsilon limit
from dp_utils import compute_eps_lower_single
from privacy_estimates import AttackResults

n_obs = 2000
compute_eps_lower_single(AttackResults(FP=0, FN=0, TP=n_obs//2, TN=n_obs//2), 0.1, 0, method='cp')

In [None]:
fig, ax = plt.subplots()

results = pd.read_csv(f'results/cv_shuffle_results/ds_0.1.4/results_agg.csv')

resultss = [results[results['model'] == model_name] for model_name in ['DSynthPB']]
plot_barv(ax, resultss, ['White-box'], upper_bound=None, title='PrivBayes (DS) v0.1.4', xticklabels=['$1.0$', '$2.0$', '$4.0$', '$10.0$'])

epses = results['theor_eps'].unique()
for i, eps in enumerate(epses):
    ax.plot([i - 0.4, i + 0.4], [eps, eps], color='red', linestyle='--', label='Theoretical $\\varepsilon$' if i == 0 else None)

x_l, x_r, _, _ = ax.axis()
ax.plot([x_l, x_r], [5.60, 5.60], linestyle='-.', color='black', label='Maximum auditable $\\varepsilon$')
ax.set_xlim(x_l, x_r)

ax.set_xlabel('Theoretical $\\varepsilon$')
ax.set_ylim(-0.25, 10.0)
ax.set_yticks(np.arange(12))

ax.legend()

fig.set_size_inches(10, 5)
fig.savefig(f'{img_dir}/ds_0.1.4.pdf', bbox_inches='tight')

## DPWGAN
- early stopping bug

In [None]:
fig, ax = plt.subplots()

results = pd.read_csv(f'results/cv_shuffle_results/dpwgan_bug/results_agg.csv')

resultss = [results[(results['model'] == 'DPWGAN') & (results['attack_type'] == attack_type)] for attack_type in ['bb_querybased', 'wb', 'active_wb']]
plot_barv(ax, resultss, ['Black-box', 'White-box', 'Active White-box'], upper_bound=None, title='DPWGAN (NIST)', xticklabels=['$0.1$', '$0.4$', '$1.0$', '$4.0$'])

epses = results['theor_eps'].unique()
for i, eps in enumerate(epses):
    ax.plot([i - 0.4, i + 0.4], [eps, eps], color='red', linestyle='--', label='Theoretical $\\varepsilon$' if i == 0 else None)

ax.set_xlabel('Theoretical $\\varepsilon$')
ax.set_ylim(-0.25, 45.0)
ax.set_yticks([0, 5, 10, 15, 20, 25, 30, 35, 40, 45])

ax.legend()

fig.set_size_inches(10, 5)
fig.savefig(f'{img_dir}/dpwgan_bug.pdf', bbox_inches='tight')

## Extras

### PrivBayes White-box Improvements

In [None]:
fig, axs = plt.subplots(1, 2)

results = pd.read_csv(f'results/cv_shuffle_results/feat_types_epses/results_agg.csv')

for model_name, model_name_label, ax in zip(['DPartPB', 'DSynthPB'], ['PrivBayes (DS)', 'PrivBayes (Hazy)'], axs.flat):
    resultss = [results[(results['model'] == model_name) & (results['attack_type'] == attack_type)] for attack_type in ['wb_vals', 'wb_errors+sum']]
    plot_barv(ax, resultss, ['$\mathcal{F}_{naive}$', '$\mathcal{F}_{error}$'], upper_bound=None, title=model_name_label, xticklabels=['$1.0$', '$2.0$', '$4.0$', '$10.0$'])

    ax.set_xlabel('Theoretical $\\varepsilon$')
    ax.set_ylim(-0.25, 10.0)
    ax.set_yticks(np.arange(11))

h,l = ax.get_legend_handles_labels()
fig.legend(h, l, loc='upper center', ncol=3, bbox_to_anchor=(0.5, 1.05))

fig.set_size_inches(10, 5)
fig.savefig(f'{img_dir}/feat_types_privbayes.pdf', bbox_inches='tight')

### MST White-box Improvements

In [None]:
fig, axs = plt.subplots(1, 2)

results = pd.read_csv(f'results/cv_shuffle_results/feat_types_epses/results_agg.csv')

for model_name, model_name_label, ax in zip(['NIST_MST', 'MST'], ['MST (NIST)', 'MST (Smartnoise)'], axs.flat):
    resultss = [results[(results['model'] == model_name) & (results['attack_type'] == attack_type)] for attack_type in ['wb_vals', 'wb_errors+sum']]
    plot_barv(ax, resultss, ['$\mathcal{F}_{naive}$', '$\mathcal{F}_{error}$'], upper_bound=None, title=model_name_label, xticklabels=['$1.0$', '$2.0$', '$4.0$', '$10.0$'])

    ax.set_xlabel('Theoretical $\\varepsilon$')
    ax.set_ylim(-0.25, 10.0)
    ax.set_yticks(np.arange(11))

h,l = ax.get_legend_handles_labels()
fig.legend(h, l, loc='upper center', ncol=3, bbox_to_anchor=(0.5, 1.05))

fig.set_size_inches(10, 5)
fig.savefig(f'{img_dir}/feat_types_mst.pdf', bbox_inches='tight')

## Compare worst-case datasets
- white-box attacks

In [None]:
fig, ax = plt.subplots()

results = pd.read_csv(f'results/cv_shuffle_results/test_worst_case/results_agg.csv')
resultss = [results[(results['dataset_type'] == dataset_type) & (results['attack_type'] == 'wb')] for dataset_type in ['worstcase', 'worstcase_narrow', 'worstcase_repeat', 'worstcase_narrow_repeat']]
plot_barv(ax, resultss, ['small', 'small+narrow', 'small+repeat', 'small+narrow+repeat'], upper_bound=4.0)

ax.set_ylim(-0.25, 6)

h,l = ax.get_legend_handles_labels()
fig.legend(h, l, loc='upper center', ncol=3, bbox_to_anchor=(0.5, 1.05))

fig.set_size_inches(10, 5)
fig.savefig(f'{img_dir}/compare_worstcase_datasets_wb.pdf', bbox_inches='tight')