# Get the graph for model performance and fairness
## Imports

In [1]:
from pathlib import Path
import numpy as np
import matplotlib.pyplot as plt

from lib.utils import *

celeba_attributes = ['Attractive', 'High_Cheekbones', 'Mouth_Slightly_Open', 'Smiling']

def resolve_binary_stat(attr, stat, length):
    # stat should be in shape (N, attributes, 8)
    # 8: male TP/ FP/ FN/ TN /female TP/ FP/ FN/ TN, split by gender in default
    g1_acc_list, g2_acc_list, total_acc_list, equality_of_opportunity_list, equalized_odds_list = list(), list(), list(), list(), list()
    for batch_idx in range(length):
        g1_tp, g1_fp, g1_fn, g1_tn = [stat[batch_idx,attr,i] for i in range(0, 4)]
        g2_tp, g2_fp, g2_fn, g2_tn = [stat[batch_idx,attr,i] for i in range(4, 8)]
        g1_acc = (g1_tp+g1_tn)/(g1_tp+g1_fp+g1_fn+g1_tn)
        g2_acc = (g2_tp+g2_tn)/(g2_tp+g2_fp+g2_fn+g2_tn)
        total_acc = (g1_tp+g1_tn+g2_tp+g2_tn)/(g1_tp+g1_fp+g1_fn+g1_tn+g2_tp+g2_fp+g2_fn+g2_tn)
        g1_tpr, g1_tnr = g1_tp/(g1_tp+g1_fn), g1_tn/(g1_tn+g1_fp)
        g2_tpr, g2_tnr = g2_tp/(g2_tp+g2_fn), g2_tn/(g2_tn+g2_fp)
        tpr_diff, tnr_diff = abs(g1_tpr-g2_tpr), abs(g1_tnr-g2_tnr)
        g1_acc_list.append(g1_acc)
        g2_acc_list.append(g2_acc)
        total_acc_list.append(total_acc)
        equality_of_opportunity_list.append(tpr_diff)
        equalized_odds_list.append(tpr_diff+tnr_diff)
    # return multiple lists of stats for a single attribute
    return g1_acc_list, g2_acc_list, total_acc_list, equality_of_opportunity_list, equalized_odds_list



## Binary prediction model
### CelebA

In [10]:
def show_celeba_stat(val_stat, train_stat=np.array([]), length=None, marker=".", markersize=4, save_name='default', root_folder='./eval/celeba'):
    # resolve the output file path
    folder = Path(root_folder)
    folder.mkdir(parents=True, exist_ok=True)
    path = folder / f"{save_name}.png"
    # parse the stat, CelebA stats are in shape (N, attributes, 8)
    x_axis = np.linspace(0, length-1, length) if length else np.linspace(0, val_stat.shape[0]-1, val_stat.shape[0])
    fig, axs  = plt.subplots(2,4, figsize=(18,8))
    for attr in range(val_stat.shape[1]): # for each attribute
        val_g1_acc_list, val_g2_acc_list, val_total_acc_list, val_eqopp_list, val_eqodd_list = resolve_binary_stat(attr, val_stat, x_axis.shape[0])
        axs[0][attr].set_title(celeba_attributes[attr])
        axs[0][attr].set_xlabel('Epochs')
        axs[0][attr].set_ylabel('Accuracy')
        axs[0][attr].set_ylim([0.5, 1.0])
        axs[1][attr].set_xlabel('Epochs')
        axs[1][attr].set_ylabel('Fairness, (lower the better)')
        axs[1][attr].set_ylim([0.0, 1.0])
        if len(train_stat):
            train_g1_acc_list, train_g2_acc_list, train_total_acc_list, train_eqopp_list, train_eqodd_list = resolve_binary_stat(attr, train_stat, x_axis.shape[0])
            train_total_acc, = axs[0][attr].plot(x_axis, train_total_acc_list, marker=marker, markersize=markersize)
            val_total_acc, = axs[0][attr].plot(x_axis, val_total_acc_list, marker=marker, markersize=markersize)
            axs[0][attr].legend((train_total_acc, val_total_acc), ('Training Acc.', 'Validation Acc.',), loc='lower right')
            train_eqopp, = axs[1][attr].plot(x_axis, train_eqopp_list, marker=marker, markersize=markersize)
            train_eqodd, = axs[1][attr].plot(x_axis, train_eqodd_list, marker=marker, markersize=markersize)
            val_eqopp, = axs[1][attr].plot(x_axis, val_eqopp_list, marker=marker, markersize=markersize)
            val_eqodd, = axs[1][attr].plot(x_axis, val_eqodd_list, marker=marker, markersize=markersize)
            axs[1][attr].legend((train_eqopp, train_eqodd, val_eqopp, val_eqodd), ('Training equality of opportunity', 'Training equalized odds', 'Validation equality of opportunity', 'Validation equalized odds'), loc='upper right')
        else:
            val_g1_acc, = axs[0][attr].plot(x_axis, val_g1_acc_list, marker=marker, markersize=markersize)
            val_g2_acc, = axs[0][attr].plot(x_axis, val_g2_acc_list, marker=marker, markersize=markersize)
            val_total_acc, = axs[0][attr].plot(x_axis, val_total_acc_list, marker=marker, markersize=markersize)
            axs[0][attr].legend((val_g1_acc, val_g2_acc, val_total_acc), ('Male', 'Female', 'Total'), loc='lower right')
            val_eqopp, = axs[1][attr].plot(x_axis, val_eqopp_list, marker=marker, markersize=markersize)
            val_eqodd, = axs[1][attr].plot(x_axis, val_eqodd_list, marker=marker, markersize=markersize)
            axs[1][attr].legend((val_eqopp, val_eqodd), ('Equality of opportunity', 'Equalized odds'), loc='upper right')
    fig.tight_layout()
    fig.savefig(path,)
    plt.close(fig)

def print_celeba_stat_by_epoch(epoch, val_stat, train_stat=np.array([])):
    for attr in range(val_stat.shape[1]): # for each attribute
        print(f'==== {celeba_attributes[attr]} ====')
        val_g1_acc_list, val_g2_acc_list, val_total_acc_list, val_eqopp_list, val_eqodd_list = resolve_binary_stat(attr, val_stat, val_stat.shape[0])
        if len(train_stat):
            train_g1_acc_list, train_g2_acc_list, train_total_acc_list, train_eqopp_list, train_eqodd_list = resolve_binary_stat(attr, train_stat, train_stat.shape[0])
            print(f'Training:')
            print(f'    Group 1 Acc.: {train_g1_acc_list[epoch]:.4f}')
            print(f'    Group 2 Acc.: {train_g2_acc_list[epoch]:.4f}')
            print(f'    Total   Acc.: {train_total_acc_list[epoch]:.4f}')
            print(f'        Equality of opportunity: {train_eqopp_list[epoch]:.4f}')
            print(f'        Equalized odds: {train_eqodd_list[epoch]:.4f}')
        print(f'Validation:')
        print(f'    Group 1 Acc.: {val_g1_acc_list[epoch]:.4f}')
        print(f'    Group 2 Acc.: {val_g2_acc_list[epoch]:.4f}')
        print(f'    Total   Acc.: {val_total_acc_list[epoch]:.4f}')
        print(f'        Equality of opportunity: {val_eqopp_list[epoch]:.4f}')
        print(f'        Equalized odds: {val_eqodd_list[epoch]:.4f}')
        print(f'')
        

model_ckpt_root = Path('/tmp2/npfe/model_stats')
# draw CelebA stats here
# train_stat = load_stats(f'CelebA_lr_1e_3_train', root_folder=model_ckpt_root/'CelebA_lr_1e_3')
# val_stat = load_stats(f'CelebA_lr_1e_3_val', root_folder=model_ckpt_root/'CelebA_lr_1e_3')

# show_celeba_stat(val_stat, length=22, save_name='CelebA_lr_1e_3', root_folder='./eval_resnet50/celeba')
# show_celeba_stat(val_stat, train_stat, length=22, save_name='CelebA_lr_1e_3_2a', root_folder='./eval_resnet50/celeba')
# print_celeba_stat_by_epoch(20, val_stat, train_stat)

train_stat = load_stats(f'CelebA_RMob_1e3_train', root_folder=model_ckpt_root/'CelebA_RMob_1e3')
val_stat = load_stats(f'CelebA_RMob_1e3_val', root_folder=model_ckpt_root/'CelebA_RMob_1e3')

show_celeba_stat(val_stat, length=50, save_name='CelebA_RMob_1e3', root_folder='./eval_debug/celeba')
show_celeba_stat(val_stat, train_stat, length=50, save_name='CelebA_RMob_1e3_2a', root_folder='./eval_debug/celeba')

train_stat = load_stats(f'CelebA_RMob_1e3_train', root_folder=model_ckpt_root/'CelebA_RMob_1e3')
val_stat = load_stats(f'CelebA_RMob_1e3_val', root_folder=model_ckpt_root/'CelebA_RMob_1e3')

show_celeba_stat(val_stat, length=50, save_name='CelebA_RMob_1e3', root_folder='./eval_debug/celeba')
show_celeba_stat(val_stat, train_stat, length=50, save_name='CelebA_RMob_1e3_2a', root_folder='./eval_debug/celeba')

# ==========================================================================================================
train_stat = load_stats(f'CelebA_RR50_1e3_train', root_folder=model_ckpt_root/'CelebA_RR50_1e3')
val_stat = load_stats(f'CelebA_RR50_1e3_val', root_folder=model_ckpt_root/'CelebA_RR50_1e3')

show_celeba_stat(val_stat, length=50, save_name='CelebA_RR50_1e3', root_folder='./eval_debug/celeba')
show_celeba_stat(val_stat, train_stat, length=50, save_name='CelebA_RR50_1e3_2a', root_folder='./eval_debug/celeba')

# ==========================================================================================================
train_stat = load_stats(f'CelebA_TMob_1e3_train', root_folder=model_ckpt_root/'CelebA_TMob_1e3')
val_stat = load_stats(f'CelebA_TMob_1e3_val', root_folder=model_ckpt_root/'CelebA_TMob_1e3')

show_celeba_stat(val_stat, length=50, save_name='CelebA_TMob_1e3', root_folder='./eval_debug/celeba')
show_celeba_stat(val_stat, train_stat, length=50, save_name='CelebA_TMob_1e3_2a', root_folder='./eval_debug/celeba')
print_celeba_stat_by_epoch(49, val_stat, train_stat)

==== Attractive ====
Training:
    Group 1 Acc.: 0.8364
    Group 2 Acc.: 0.8226
    Total   Acc.: 0.8284
        Equality of opportunity: 0.2384
        Equalized odds: 0.4788
Validation:
    Group 1 Acc.: 0.7849
    Group 2 Acc.: 0.7808
    Total   Acc.: 0.7826
        Equality of opportunity: 0.3825
        Equalized odds: 0.5340

==== High_Cheekbones ====
Training:
    Group 1 Acc.: 0.8657
    Group 2 Acc.: 0.8923
    Total   Acc.: 0.8812
        Equality of opportunity: 0.1370
        Equalized odds: 0.1522
Validation:
    Group 1 Acc.: 0.8645
    Group 2 Acc.: 0.8920
    Total   Acc.: 0.8803
        Equality of opportunity: 0.1837
        Equalized odds: 0.2165

==== Mouth_Slightly_Open ====
Training:
    Group 1 Acc.: 0.9270
    Group 2 Acc.: 0.9308
    Total   Acc.: 0.9292
        Equality of opportunity: 0.0210
        Equalized odds: 0.0274
Validation:
    Group 1 Acc.: 0.9344
    Group 2 Acc.: 0.9399
    Total   Acc.: 0.9376
        Equality of opportunity: 0.0184
        Eq

### FairFaceAge, UTKFaceAge

In [3]:
def show_agemodel_stat(val_stat, train_stat=np.array([]), length=None, marker=".", markersize=4, save_name='default', root_folder='./eval/agemodel'):
    # resolve the output file path
    folder = Path(root_folder)
    folder.mkdir(parents=True, exist_ok=True)
    path = folder / f"{save_name}.png"
    # parse the stat, Agemodel stats are in shape (N, 1, 8)
    x_axis = np.linspace(0, length-1, length) if length else np.linspace(0, val_stat.shape[0]-1, val_stat.shape[0])
    fig, axs  = plt.subplots(2,1, figsize=(5,8))
    val_g1_acc_list, val_g2_acc_list, val_total_acc_list, val_eqopp_list, val_eqodd_list = resolve_binary_stat(0, val_stat, x_axis.shape[0])
    axs[0].set_title('Is age greater than 30?')
    axs[0].set_xlabel('Epochs')
    axs[0].set_ylabel('Accuracy')
    axs[0].set_ylim([0.5, 1.0])
    axs[1].set_xlabel('Epochs')
    axs[1].set_ylabel('Fairness, (lower the better)')
    axs[1].set_ylim([0.0, 1.0])
    if len(train_stat):
        train_g1_acc_list, train_g2_acc_list, train_total_acc_list, train_eqopp_list, train_eqodd_list = resolve_binary_stat(0, train_stat, x_axis.shape[0])
        train_total_acc, = axs[0].plot(x_axis, train_total_acc_list, marker=marker, markersize=markersize)
        val_total_acc, = axs[0].plot(x_axis, val_total_acc_list, marker=marker, markersize=markersize)
        axs[0].legend((train_total_acc, val_total_acc), ('Training Acc.', 'Validation Acc.',), loc='lower right')
        train_eqopp, = axs[1].plot(x_axis, train_eqopp_list, marker=marker, markersize=markersize)
        train_eqodd, = axs[1].plot(x_axis, train_eqodd_list, marker=marker, markersize=markersize)
        val_eqopp, = axs[1].plot(x_axis, val_eqopp_list, marker=marker, markersize=markersize)
        val_eqodd, = axs[1].plot(x_axis, val_eqodd_list, marker=marker, markersize=markersize)
        axs[1].legend((train_eqopp, train_eqodd, val_eqopp, val_eqodd), ('Training equality of opportunity', 'Training equalized odds', 'Validation equality of opportunity', 'Validation equalized odds'), loc='upper right')
    else:
        val_g1_acc, = axs[0].plot(x_axis, val_g1_acc_list, marker=marker, markersize=markersize)
        val_g2_acc, = axs[0].plot(x_axis, val_g2_acc_list, marker=marker, markersize=markersize)
        val_total_acc, = axs[0].plot(x_axis, val_total_acc_list, marker=marker, markersize=markersize)
        axs[0].legend((val_g1_acc, val_g2_acc, val_total_acc), ('Male', 'Female', 'Total'), loc='lower right')
        val_eqopp, = axs[1].plot(x_axis, val_eqopp_list, marker=marker, markersize=markersize)
        val_eqodd, = axs[1].plot(x_axis, val_eqodd_list, marker=marker, markersize=markersize)
        axs[1].legend((val_eqopp, val_eqodd), ('Equality of opportunity', 'Equalized odds'), loc='upper right')
    fig.tight_layout()
    fig.savefig(path,)
    plt.close(fig)

def print_agemodel_stat_by_epoch(epoch, val_stat, train_stat=np.array([])):
    print(f'==== Is age greater than 30? ====')
    val_g1_acc_list, val_g2_acc_list, val_total_acc_list, val_eqopp_list, val_eqodd_list = resolve_binary_stat(0, val_stat, val_stat.shape[0])
    if len(train_stat):
        train_g1_acc_list, train_g2_acc_list, train_total_acc_list, train_eqopp_list, train_eqodd_list = resolve_binary_stat(0, train_stat, train_stat.shape[0])
        print(f'Training:')
        print(f'    Group 1 Acc.: {train_g1_acc_list[epoch]:.4f}')
        print(f'    Group 2 Acc.: {train_g2_acc_list[epoch]:.4f}')
        print(f'    Total   Acc.: {train_total_acc_list[epoch]:.4f}')
        print(f'        Equality of opportunity: {train_eqopp_list[epoch]:.4f}')
        print(f'        Equalized odds: {train_eqodd_list[epoch]:.4f}')
    print(f'Validation:')
    print(f'    Group 1 Acc.: {val_g1_acc_list[epoch]:.4f}')
    print(f'    Group 2 Acc.: {val_g2_acc_list[epoch]:.4f}')
    print(f'    Total   Acc.: {val_total_acc_list[epoch]:.4f}')
    print(f'        Equality of opportunity: {val_eqopp_list[epoch]:.4f}')
    print(f'        Equalized odds: {val_eqodd_list[epoch]:.4f}')
    print(f'')


model_ckpt_root = Path('/tmp2/npfe/model_stats')
# draw FairFaceAge stats here
train_stat = load_stats(f'FairFaceAge_lr_1e_3_train', root_folder=model_ckpt_root/'FairFaceAge_lr_1e_3')
val_stat = load_stats(f'FairFaceAge_lr_1e_3_val', root_folder=model_ckpt_root/'FairFaceAge_lr_1e_3')

show_agemodel_stat(val_stat, length=35, save_name='FairFaceAge_lr_1e_3', root_folder='./eval_resnet50/fairfaceage')
show_agemodel_stat(val_stat, train_stat, length=35, save_name='FairFaceAge_lr_1e_3_2a', root_folder='./eval_resnet50/fairfaceage')
print_agemodel_stat_by_epoch(25, val_stat, train_stat)

# draw UTKFaceAge stats here
train_stat = load_stats(f'UTKFaceAge_lr_5e_4_train', root_folder=model_ckpt_root/'UTKFaceAge_lr_5e_4')
val_stat = load_stats(f'UTKFaceAge_lr_5e_4_val', root_folder=model_ckpt_root/'UTKFaceAge_lr_5e_4')

show_agemodel_stat(val_stat, length=30, save_name='UTKFaceAge_lr_5e_4', root_folder='./eval_resnet50/utkfaceage')
show_agemodel_stat(val_stat, train_stat, length=30, save_name='UTKFaceAge_lr_5e_4_2a', root_folder='./eval_resnet50/utkfaceage')
print_agemodel_stat_by_epoch(20, val_stat, train_stat)
    

==== Is age greater than 30? ====
Training:
    Group 1 Acc.: 0.8037
    Group 2 Acc.: 0.8206
    Total   Acc.: 0.8126
        Equality of opportunity: 0.1484
        Equalized odds: 0.2251
Validation:
    Group 1 Acc.: 0.8012
    Group 2 Acc.: 0.8179
    Total   Acc.: 0.8100
        Equality of opportunity: 0.1561
        Equalized odds: 0.2288

==== Is age greater than 30? ====
Training:
    Group 1 Acc.: 0.8346
    Group 2 Acc.: 0.8554
    Total   Acc.: 0.8455
        Equality of opportunity: 0.1338
        Equalized odds: 0.2154
Validation:
    Group 1 Acc.: 0.8324
    Group 2 Acc.: 0.8558
    Total   Acc.: 0.8446
        Equality of opportunity: 0.1388
        Equalized odds: 0.2211

