# Get the graph for adversarial attack performance and fairness
## Imports

In [1]:
from pathlib import Path
import numpy as np
import matplotlib.pyplot as plt

from lib.utils import *

celeba_attributes = ['Attractive', 'High_Cheekbones', 'Mouth_Slightly_Open', 'Smiling']

def resolve_binary_stat(attr, stat, length):
    # stat should be in shape (N, attributes, 8)
    # 8: male TP/ FP/ FN/ TN /female TP/ FP/ FN/ TN, split by gender in default
    g1_acc_list, g2_acc_list, total_acc_list, equality_of_opportunity_list, equalized_odds_list = list(), list(), list(), list(), list()
    for batch_idx in range(length):
        g1_tp, g1_fp, g1_fn, g1_tn = [stat[batch_idx,attr,i] for i in range(0, 4)]
        g2_tp, g2_fp, g2_fn, g2_tn = [stat[batch_idx,attr,i] for i in range(4, 8)]
        g1_acc = (g1_tp+g1_tn)/(g1_tp+g1_fp+g1_fn+g1_tn)
        g2_acc = (g2_tp+g2_tn)/(g2_tp+g2_fp+g2_fn+g2_tn)
        total_acc = (g1_tp+g1_tn+g2_tp+g2_tn)/(g1_tp+g1_fp+g1_fn+g1_tn+g2_tp+g2_fp+g2_fn+g2_tn)
        g1_tpr, g1_tnr = g1_tp/(g1_tp+g1_fn), g1_tn/(g1_tn+g1_fp)
        g2_tpr, g2_tnr = g2_tp/(g2_tp+g2_fn), g2_tn/(g2_tn+g2_fp)
        tpr_diff, tnr_diff = abs(g1_tpr-g2_tpr), abs(g1_tnr-g2_tnr)
        g1_acc_list.append(g1_acc)
        g2_acc_list.append(g2_acc)
        total_acc_list.append(total_acc)
        equality_of_opportunity_list.append(tpr_diff)
        equalized_odds_list.append(tpr_diff+tnr_diff)
    # return multiple lists of stats for a single attribute
    return g1_acc_list, g2_acc_list, total_acc_list, equality_of_opportunity_list, equalized_odds_list

def resolve_binary_performance(attr, stat, length, fairness_matrix='equality of opportunity'):
    # get the performance list for a single attribute
    # return a list of length = length (epochs selected)
    # The larger the number it return, the better
    _, _, total_acc_list, equality_of_opportunity_list, equalized_odds_list = resolve_binary_stat(attr, stat, length)
    # the first element in the list should be the raw performance
    init_acc, init_eqopp, init_eqodd = total_acc_list[0], equality_of_opportunity_list[0], equalized_odds_list[0]
    tacc_list = list(map(lambda x: x-init_acc, total_acc_list))
    eqopp_list = list(map(lambda x: init_eqopp-x, equality_of_opportunity_list))
    eqodd_list = list(map(lambda x: init_eqodd-x, equalized_odds_list))
    # linear combination on accuracy and fairness
    match fairness_matrix:
        case "equality of opportunity":
            return [sum(x) for x in zip(tacc_list, eqopp_list)]
        case "equalized odds":
            return [sum(x) for x in zip(tacc_list, eqodd_list)]
        case _:
            assert False, f'the fairness matrix {fairness_matrix} not supported.'

## Binary prediction model
### CelebA
Get the adversarial accuracy and fairness status like we did in model training. 

In [2]:
def show_celeba_stat(val_stat, train_stat=np.array([]), length=None, marker=".", markersize=4, save_name='default', root_folder='./eval/celeba'):
    # resolve the output file path
    folder = Path(root_folder)
    folder.mkdir(parents=True, exist_ok=True)
    path = folder / f"{save_name}.png"
    # parse the stat, CelebA stats are in shape (N, attributes, 8)
    x_axis = np.linspace(0, length-1, length) if length else np.linspace(0, val_stat.shape[0]-1, val_stat.shape[0])
    fig, axs  = plt.subplots(2,4, figsize=(18,8))
    for attr in range(val_stat.shape[1]): # for each attribute
        val_g1_acc_list, val_g2_acc_list, val_total_acc_list, val_eqopp_list, val_eqodd_list = resolve_binary_stat(attr, val_stat, x_axis.shape[0])
        axs[0][attr].set_title(celeba_attributes[attr])
        axs[0][attr].set_xlabel('Epochs')
        axs[0][attr].set_ylabel('Accuracy')
        axs[0][attr].set_ylim([0.5, 1.0])
        axs[1][attr].set_xlabel('Epochs')
        axs[1][attr].set_ylabel('Fairness, (lower the better)')
        axs[1][attr].set_ylim([0.0, 1.0])
        if len(train_stat):
            train_g1_acc_list, train_g2_acc_list, train_total_acc_list, train_eqopp_list, train_eqodd_list = resolve_binary_stat(attr, train_stat, x_axis.shape[0])
            train_total_acc, = axs[0][attr].plot(x_axis, train_total_acc_list, marker=marker, markersize=markersize)
            val_total_acc, = axs[0][attr].plot(x_axis, val_total_acc_list, marker=marker, markersize=markersize)
            axs[0][attr].legend((train_total_acc, val_total_acc), ('Training Acc.', 'Validation Acc.',), loc='lower right')
            train_eqopp, = axs[1][attr].plot(x_axis, train_eqopp_list, marker=marker, markersize=markersize)
            train_eqodd, = axs[1][attr].plot(x_axis, train_eqodd_list, marker=marker, markersize=markersize)
            val_eqopp, = axs[1][attr].plot(x_axis, val_eqopp_list, marker=marker, markersize=markersize)
            val_eqodd, = axs[1][attr].plot(x_axis, val_eqodd_list, marker=marker, markersize=markersize)
            axs[1][attr].legend((train_eqopp, train_eqodd, val_eqopp, val_eqodd), ('Training equality of opportunity', 'Training equalized odds', 'Validation equality of opportunity', 'Validation equalized odds'), loc='upper right')
        else:
            val_g1_acc, = axs[0][attr].plot(x_axis, val_g1_acc_list, marker=marker, markersize=markersize)
            val_g2_acc, = axs[0][attr].plot(x_axis, val_g2_acc_list, marker=marker, markersize=markersize)
            val_total_acc, = axs[0][attr].plot(x_axis, val_total_acc_list, marker=marker, markersize=markersize)
            axs[0][attr].legend((val_g1_acc, val_g2_acc, val_total_acc), ('Male', 'Female', 'Total'), loc='lower right')
            val_eqopp, = axs[1][attr].plot(x_axis, val_eqopp_list, marker=marker, markersize=markersize)
            val_eqodd, = axs[1][attr].plot(x_axis, val_eqodd_list, marker=marker, markersize=markersize)
            axs[1][attr].legend((val_eqopp, val_eqodd), ('Equality of opportunity', 'Equalized odds'), loc='upper right')
    fig.tight_layout()
    fig.savefig(path,)
    plt.close(fig)

def print_celeba_stat_by_epoch(epoch, val_stat, train_stat=np.array([])):
    for attr in range(val_stat.shape[1]): # for each attribute
        print(f'==== {celeba_attributes[attr]} ====')
        val_g1_acc_list, val_g2_acc_list, val_total_acc_list, val_eqopp_list, val_eqodd_list = resolve_binary_stat(attr, val_stat, val_stat.shape[0])
        if len(train_stat):
            train_g1_acc_list, train_g2_acc_list, train_total_acc_list, train_eqopp_list, train_eqodd_list = resolve_binary_stat(attr, train_stat, train_stat.shape[0])
            print(f'Training:')
            print(f'    Group 1 Acc.: {train_g1_acc_list[epoch]:.4f}')
            print(f'    Group 2 Acc.: {train_g2_acc_list[epoch]:.4f}')
            print(f'    Total   Acc.: {train_total_acc_list[epoch]:.4f}')
            print(f'        Equality of opportunity: {train_eqopp_list[epoch]:.4f}')
            print(f'        Equalized odds: {train_eqodd_list[epoch]:.4f}')
        print(f'Validation:')
        print(f'    Group 1 Acc.: {val_g1_acc_list[epoch]:.4f}')
        print(f'    Group 2 Acc.: {val_g2_acc_list[epoch]:.4f}')
        print(f'    Total   Acc.: {val_total_acc_list[epoch]:.4f}')
        print(f'        Equality of opportunity: {val_eqopp_list[epoch]:.4f}')
        print(f'        Equalized odds: {val_eqodd_list[epoch]:.4f}')
        print(f'')
        


Get the accuracy versus fairness graph and get the best epoch

In [3]:
# should source from 2 differenct status result from the attacks with different fairness matrix,
# but we're not here yet

# remember to sum up all the attributes in performance
def show_celeba_acc2fairness(val_stat, length=None, marker=".", markersize=4, save_name='default', root_folder='./eval/celeba'):
    # resolve the output file path
    folder = Path(root_folder)
    folder.mkdir(parents=True, exist_ok=True)
    path = folder / f"{save_name}.png"
    # parse the stat, CelebA stats are in shape (N, attributes, 8)
    x_axis = np.linspace(0, length-1, length) if length else np.linspace(0, val_stat.shape[0]-1, val_stat.shape[0])
    eqopp_performance, eqodd_performance = [0]*x_axis.shape[0], [0]*x_axis.shape[0]
    for attr in range(val_stat.shape[1]): # for each attribute
        eqopp_p = resolve_binary_performance(attr, val_stat, x_axis.shape[0], fairness_matrix='equality of opportunity')
        eqopp_performance = [sum(x) for x in zip(eqopp_performance, eqopp_p)]
        eqodd_p = resolve_binary_performance(attr, val_stat, x_axis.shape[0], fairness_matrix='equalized odds')
        eqodd_performance = [sum(x) for x in zip(eqodd_performance, eqodd_p)]
    # exclude the raw performance
    best_eqopp_epoch = eqopp_performance[1:].index(max(eqopp_performance[1:]))+1
    best_eqodd_epoch = eqodd_performance[1:].index(max(eqodd_performance[1:]))+1
    print(f'Best epoch for equality of opportunity: {best_eqopp_epoch:04d} with score {eqopp_performance[best_eqopp_epoch]:.4f}')
    if max(eqopp_performance[1:]) < 0:
        print(f'[Worse than raw performance]')
    print_celeba_stat_by_epoch(best_eqopp_epoch, val_stat)
    print(f'Best epoch for equalized odds:          {best_eqodd_epoch:04d} with score {eqodd_performance[best_eqodd_epoch]:.4f}')
    if max(eqodd_performance[1:]) < 0:
        print(f'[Worse than raw performance]')
    print_celeba_stat_by_epoch(best_eqodd_epoch, val_stat)
    #
    fig, axs  = plt.subplots(2,4, figsize=(18,8))
    for attr in range(val_stat.shape[1]): # for each attribute
        val_g1_acc_list, val_g2_acc_list, val_total_acc_list, val_eqopp_list, val_eqodd_list = resolve_binary_stat(attr, val_stat, x_axis.shape[0])
        axs[0][attr].set_title(celeba_attributes[attr])
        axs[0][attr].set_xlabel('Fairness')
        axs[0][attr].set_ylabel('Accuracy')
        axs[0][attr].set_box_aspect(1)
        axs[0][attr].set_xlim([0.5, 1.0])
        axs[0][attr].set_ylim([0.5, 1.0])
        axs[1][attr].set_xlabel('Fairness')
        axs[1][attr].set_ylabel('Accuracy')
        axs[1][attr].set_box_aspect(1)
        axs[1][attr].set_xlim([0.5, 1.0])
        axs[1][attr].set_ylim([0.5, 1.0])
        # equality of opportunity
        eqopp, = axs[0][attr].plot(list(map(lambda x: 1.0-x, val_eqopp_list)), val_total_acc_list, marker=marker, markersize=markersize)
        b_eqopp = axs[0][attr].scatter([1.0-val_eqopp_list[best_eqopp_epoch]], [val_total_acc_list[best_eqopp_epoch]], color='#FF2301')
        axs[0][attr].legend((eqopp, b_eqopp,), ('Equality of opportunity', 'best epoch',), loc='lower left')

        # equalized odds
        eqodd, = axs[1][attr].plot(list(map(lambda x: 1.0-x, val_eqodd_list)), val_total_acc_list, marker=marker, markersize=markersize)
        b_eqodd = axs[1][attr].scatter([1.0-val_eqodd_list[best_eqodd_epoch]], [val_total_acc_list[best_eqodd_epoch]], color='#FF2301')
        axs[1][attr].legend((eqodd, b_eqodd,), ('Equalized odds', 'best epoch',), loc='lower left')
    fig.tight_layout()
    fig.savefig(path,)
    plt.close(fig)

## Noise

In [4]:
advatk_ckpt_root = Path('/tmp2/npfe/noise_stats')

# CelebA direct best model
# val_stat = load_stats(f'CelebADirect_lr_1e_1_val', root_folder=advatk_ckpt_root/'CelebADirect_lr_1e_1')
# show_celeba_acc2fairness(val_stat, save_name='CelebADirect_lr_1e_1_af', root_folder='./eval_resnet50/celeba_noise')
# print_celeba_stat_by_epoch(0, val_stat)

In [5]:
# CelebA BCEMasking best model
# val_stat = load_stats(f'CelebABCEmasking_lr_2e_4_val', root_folder=advatk_ckpt_root/'CelebABCEmasking_lr_2e_4')
# show_celeba_acc2fairness(val_stat, save_name='CelebABCEmasking_lr_2e_4_af', root_folder='./eval_resnet50/celeba_noise')

In [6]:
# CelebA POPtim best model
# val_stat = load_stats(f'CelebAPOptim_lr_1e_3_val', root_folder=advatk_ckpt_root/'CelebAPOptim_lr_1e_3')
# show_celeba_acc2fairness(val_stat, save_name='CelebAPOptim_lr_1e_3_af', root_folder='./eval_resnet50/celeba_noise')

In [7]:

# CelebA POPtim BCE best model
# val_stat = load_stats(f'CelebAPOptim_BCE_lr_2e_3_val', root_folder=advatk_ckpt_root/'CelebAPOptim_BCE_lr_2e_3')
# show_celeba_acc2fairness(val_stat, save_name='CelebAPOptim_BCE_lr_2e_3_af', root_folder='./eval_resnet50/celeba_noise')

In [8]:
# CelebA POPtim POPtim best model
# val_stat = load_stats(f'CelebAPOptim_POptim_lr_1e_3_val', root_folder=advatk_ckpt_root/'CelebAPOptim_POptim_lr_1e_3')
# show_celeba_acc2fairness(val_stat, save_name='CelebAPOptim_POptim_lr_1e_3_af', root_folder='./eval_resnet50/celeba_noise')

## Patch

In [9]:
advatk_ckpt_root = Path('/tmp2/npfe/patch_stats')

# val_stat = load_stats(f'CelebADirect_lr_1e0_val', root_folder=advatk_ckpt_root/'CelebADirect_lr_1e0')
# show_celeba_acc2fairness(val_stat, save_name='CelebADirect_lr_1e0_af', root_folder='./eval_resnet50/celeba_patch')

In [10]:
# val_stat = load_stats(f'CelebABCEmasking_lr_1e6_val', root_folder=advatk_ckpt_root/'CelebABCEmasking_lr_1e6')
# show_celeba_acc2fairness(val_stat, save_name='CelebABCEmasking_lr_1e6_af', root_folder='./eval_resnet50/celeba_patch')

In [11]:
# val_stat = load_stats(f'CelebAPOptim_lr_1e4_val', root_folder=advatk_ckpt_root/'CelebAPOptim_lr_1e4')
# show_celeba_acc2fairness(val_stat, save_name='CelebAPOptim_lr_1e4_af', root_folder='./eval_resnet50/celeba_patch')

In [12]:
val_stat = load_stats(f'CelebAPOptim_BCE_lr_1e6_val', root_folder=advatk_ckpt_root/'CelebAPOptim_BCE_lr_1e6')
show_celeba_acc2fairness(val_stat, save_name='CelebAPOptim_BCE_lr_1e6_af', root_folder='./eval_resnet50/celeba_patch')

val_stat = load_stats(f'CelebAPOptim_BCE_lr_1e3_val', root_folder=advatk_ckpt_root/'CelebAPOptim_BCE_lr_1e3')
show_celeba_acc2fairness(val_stat, save_name='CelebAPOptim_BCE_lr_1e3_af', root_folder='./eval_resnet50/celeba_patch')

val_stat = load_stats(f'CelebAPOptim_BCE_lr_1e0_val', root_folder=advatk_ckpt_root/'CelebAPOptim_BCE_lr_1e0')
show_celeba_acc2fairness(val_stat, save_name='CelebAPOptim_BCE_lr_1e0_af', root_folder='./eval_resnet50/celeba_patch')

val_stat = load_stats(f'CelebAPOptim_BCE_lr_1e_3_val', root_folder=advatk_ckpt_root/'CelebAPOptim_BCE_lr_1e_3')
show_celeba_acc2fairness(val_stat, save_name='CelebAPOptim_BCE_lr_1e_3_af', root_folder='./eval_resnet50/celeba_patch')

val_stat = load_stats(f'CelebAPOptim_BCE_lr_1e_6_val', root_folder=advatk_ckpt_root/'CelebAPOptim_BCE_lr_1e_6')
show_celeba_acc2fairness(val_stat, save_name='CelebAPOptim_BCE_lr_1e_6_af', root_folder='./eval_resnet50/celeba_patch')

Best epoch for equality of opportunity: 0037 with score 0.0033
==== Attractive ====
Validation:
    Group 1 Acc.: 0.7970
    Group 2 Acc.: 0.7683
    Total   Acc.: 0.7805
        Equality of opportunity: 0.2997
        Equalized odds: 0.4905

==== High_Cheekbones ====
Validation:
    Group 1 Acc.: 0.8475
    Group 2 Acc.: 0.8454
    Total   Acc.: 0.8463
        Equality of opportunity: 0.1102
        Equalized odds: 0.1180

==== Mouth_Slightly_Open ====
Validation:
    Group 1 Acc.: 0.9062
    Group 2 Acc.: 0.8969
    Total   Acc.: 0.9009
        Equality of opportunity: 0.0011
        Equalized odds: 0.0014

==== Smiling ====
Validation:
    Group 1 Acc.: 0.8892
    Group 2 Acc.: 0.8923
    Total   Acc.: 0.8910
        Equality of opportunity: 0.0593
        Equalized odds: 0.0610

Best epoch for equalized odds:          0037 with score 0.0682
==== Attractive ====
Validation:
    Group 1 Acc.: 0.7970
    Group 2 Acc.: 0.7683
    Total   Acc.: 0.7805
        Equality of opportunity: 0.