In [4]:
import numpy as np
import pandas as pd
pd.set_option("display.precision", 2)
import seaborn as sns
import sklearn.metrics
import matplotlib.pyplot as plt
import os
from IPython.core.display import HTML

In [5]:
title_approaches = {
#     'baseline':'Naive',
    # 'fsn':'Fair Score',
#     'faircal':'FairCal (Ours)',
    # 'oracle':'Oracle (Ours)',
    'gmm-discrete':'GMM-Discrete (Ours)'
    }
title_calibration_methods = {
    'beta': 'Beta Calibration'
}
title_features = {
    'facenet':'FaceNet (VGGFace2)',
    'facenet-webface':'FaceNet (Webface)',
    'arcface': 'ArcFace'}
title_metrics = {
    'mean': 'Mean',
    'aad': 'AAD',
    'mad': 'MAD',
    'std': 'STD'}
caption_metrics = {
     'mean': 'Mean',
     'aad': 'AAD (Average Absolute Deviation)',
     'mad': 'MAD (Maximum Absolute Deviation)',
     'std': 'STD (Standard Deviation)',
}
title_keys = {
#     'baseline': 'Baseline',
    # 'agenda': 'AGENDA',
    # 'ftc': 'FTC',
    # 'fsn': 'FSN',
#     'faircal': 'FairCal (Ours)',
    # 'oracle': 'Oracle (Ours)',
    'gmm-discrete': 'GMM-Discrete (Ours)'
    }
header_titles = {
    'African': 'Af',
    'Asian': 'As',
    'Caucasian': 'Ca',
    'Indian': 'In',
    'asian_females': 'AsF',
    'asian_males': 'AsM',
    'black_females': 'AfF',
    'black_males': 'AfM',
    'indian_females': 'IF',
    'indian_males': 'IM',
    'white_females': 'CF',
    'white_males': 'CM',
    'Global': 'Gl',
    'B': 'Af',
    'A': 'As',
    'W': 'C',
    'I': 'I',
    'F': 'F',
    'M': 'M'
}
title_datasets = {
    'rfw': 'RFW',
    'bfw': 'BFW'
}
caption_calibration_methods = {
    'beta': 'beta calibration'
}
caption_measures = {
    'ks': 'KS'
}
features_datasets = {
    'rfw': ['facenet', 'facenet-webface'],
    'bfw': ['facenet-webface', 'arcnet']
}
attributes_datasets = {
    'rfw': 'ethnicity',
    'bfw': 'att',
}

In [6]:
def load_measures(dataset, feature, approach, subgroups, att, measure, calibration_method, nbins, n_clusters):
    filename = f'../experiments/{dataset}/{feature}/{approach}/{calibration_method}/nbins_{nbins}'
    if approach == 'faircal' or approach == "gmm-discrete":
        filename += f'_nclusters_{n_clusters}'
    if approach == 'fsn':
        filename += f'_nclusters_{n_clusters}_fpr_1e-03'

    results = np.load(f'{filename}.npy', allow_pickle=True).item()

    data = pd.DataFrame()
    data['folds'] = ['fold1', 'fold2', 'fold3', 'fold4', 'fold5']
    data = data.set_index('folds')
    for fold in range(1, 6):
        for j, subgroup in enumerate(subgroups[att]):
            data.loc[f'fold{str(fold)}', f'{subgroup}'] = results[f'fold{str(fold)}'][measure][att][subgroup]
    return data

In [7]:
subgroups = {
            'e':['B', 'A', 'W', 'I'],
            'g':['F','M'],
            'att': ['black_females', 'black_males', 'asian_females', 'asian_males', 'white_females', 'white_males', 'indian_females', 'indian_males']
        }
att = 'e'   
test = load_measures('bfw','facenet-webface','gmm-discrete',subgroups,att,'ks','beta',25,100)
print(test)

          B     A         W     I
folds                            
fold1  0.01  0.02  2.71e-02  0.01
fold2  0.02  0.02  1.67e-02  0.02
fold3  0.03  0.02  8.46e-03  0.05
fold4  0.02  0.04  2.69e-02  0.01
fold5  0.02  0.03  2.12e-02  0.03


In [8]:
def get_sensitive_attributes_subgroups(dataset):
    if dataset == 'rfw':
        sensitive_attributes = ['ethnicity']
        subgroups = {'ethnicity':['African', 'Asian', 'Caucasian', 'Indian']}
    elif 'bfw' in dataset:
        sensitive_attributes = ['e', 'g', 'att']
        subgroups = {
            'e':['B', 'A', 'W', 'I'],
            'g':['F','M'],
            'att': ['black_females', 'black_males', 'asian_females', 'asian_males', 'white_females', 'white_males', 'indian_females', 'indian_males']
        }
    return sensitive_attributes, subgroups

In [22]:
ks = np.array([5,10,15,20,25,50,75,100])
folds = [1,2,3,4,5]
data = pd.DataFrame()

measure = 'ks'
calibration = 'beta'

indices = {
    'rfw' : {
        'facenet': ['African', 'Asian', 'Caucasian', 'Indian'],
        'facenet-webface': ['African', 'Asian', 'Caucasian', 'Indian'],
    },
    'bfw' : {
        'facenet-webface': ['B', 'A', 'W', 'I', 'F','M', 
            'black_females', 'black_males', 'asian_females', 'asian_males', 'white_females', 'white_males', 'indian_females', 'indian_males'],
        'arcface': ['B', 'A', 'W', 'I', 'F','M', 
            'black_females', 'black_males', 'asian_females', 'asian_males', 'white_females', 'white_males', 'indian_females', 'indian_males']
        }
    }

# Create tuples from multi-indices
approaches = ['gmm-discrete']
tuples = []
for dataset in indices:
    for feature, sens in indices[dataset].items():
        for approach in approaches:
            tuples.append((dataset, feature, approach))

index = pd.MultiIndex.from_tuples(tuples, names=['dataset', 'feature', 'approach'])

data = pd.DataFrame(index=index)
for metric in ['mean', 'aad', 'mad', 'std']:
    data[metric] = np.nan

# For now, because we only have one experiment
for dataset in indices:
    for feature in indices[dataset]:
        for approach in approaches:
            sensitive_attributes, subgroups = get_sensitive_attributes_subgroups(dataset)
            for att in sensitive_attributes:
                nbins = 25 if dataset == 'bfw' else 10
                data_work = load_measures(dataset, feature, approach, subgroups, att, 'ks', 'beta', nbins=nbins, n_clusters=100)
                data_work = data_work * 100
#                 for subgroup in data_work.columns:
#                     group_mean = data_work[subgroup].mean()
#                     data.loc[dataset, feature, subgroup, approach]['mean'] = group_mean
#                     data.loc[dataset, feature, subgroup, approach]['aad'] = np.abs(data_work[subgroup] - group_mean).mean()
#                     data.loc[dataset, feature, subgroup, approach]['mad'] = np.abs(data_work[subgroup] - group_mean).max()
#                     data.loc[dataset, feature, subgroup, approach]['std'] = np.std(data_work[subgroup])

                all_subgroups = []
                for subgroup in data_work.columns:
                    all_subgroups.append(data_work[subgroup].tolist())
                    
                all_subgroups = np.hstack(all_subgroups)
                group_mean = all_subgroups.mean()
                
#                 plt.hist(all_subgroups, bins=20)
#                 print([dataset, feature, approach, str(n_cluster)])
#                 plt.show()
                
                data.loc[dataset, feature, approach]['mean'] = group_mean
                data.loc[dataset, feature, approach]['aad'] = np.abs(all_subgroups - group_mean).mean()
                data.loc[dataset, feature, approach]['mad'] = np.abs(all_subgroups - group_mean).max()
                data.loc[dataset, feature, approach]['std'] = np.std(all_subgroups)

HTML(data.to_html())

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,mean,aad,mad,std
dataset,feature,approach,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
rfw,facenet,gmm-discrete,1.7,0.65,2.06,0.77
rfw,facenet-webface,gmm-discrete,1.77,0.57,2.01,0.72
bfw,facenet-webface,gmm-discrete,3.21,1.29,3.51,1.5
bfw,arcface,gmm-discrete,1.97,1.08,4.82,1.38


In [11]:
def get_overall_stats(calibration_method, nbins, dataset,feature,approach,att,n_clusters,fpr_def):
    filename = f'../experiments/{dataset}/{feature}/{approach}/{calibration_method}/nbins_{nbins}'
    if approach == 'faircal' or approach == "gmm-discrete":
        filename += f'_nclusters_{n_clusters}'
    if approach == 'fsn':
        filename += f'_nclusters_{n_clusters}_fpr_1e-03'
    key = 'calibration' if approach in ['faircal', 'baseline', 'gmm-discrete'] else 'pre_calibration'
    
    results = np.load(f'{filename}.npy', allow_pickle=True).item()
    data = pd.DataFrame()
    data['folds'] = ['fold1', 'fold2', 'fold3', 'fold4', 'fold5']
    data['auc'] = np.nan
    data['fpr_1e-3'] = np.nan
    data['fpr_1e-2'] = np.nan
    data = data.set_index('folds')

    for fold in range(1,6):
        fpr = results['fold'+str(fold)]['fpr'][att]['Global'][key]
        tpr = results['fold'+str(fold)]['tpr'][att]['Global'][key]
        data.loc[f'fold{str(fold)}', 'auc'] = sklearn.metrics.auc(fpr,tpr)
        inter = np.interp(fpr_def, fpr, tpr)
        data.iloc[fold-1, 1:] = inter
    return data

In [13]:
# Accuracy table
keys = ['gmm-discrete']
error = [1e-3, 1e-2]
title_stat = ['AUROC', '0.1\% FPR', '1\% FPR']
n_clusters = 100
calibration = 'beta'
datasets = ['bfw', 'rfw']

indices = {
    'rfw' : ['facenet', 'facenet-webface'],
    'bfw' : ['facenet-webface', 'arcface']
}
approaches = ['gmm-discrete']

tuples = []
for dataset in indices:
    for feature in indices[dataset]:
        for approach in approaches:
            tuples.append((dataset, feature, approach))

index = pd.MultiIndex.from_tuples(tuples, names=['dataset', 'feature', 'approach'])
data = pd.DataFrame(index=index)
metrics = ['auc', 'fpr_1e-3', 'fpr_1e-2']
for metric in metrics:
    data[metric] = ''

for dataset in indices:
    for feature in indices[dataset]:
        for approach in approaches:
            nbins = 25 if dataset == 'bfw' else 10
            att = 'att' if dataset == 'bfw' else 'ethnicity'
            data_work = get_overall_stats(calibration, nbins, dataset, feature, approach, att ,n_clusters,error)
            data_work *= 100
            for metric in metrics:
                mean = round(data_work[metric].mean(), 2)
                std = round(data_work[metric].std(), 2)
                data.loc[dataset, feature, approach][metric] = f'{str(mean)} ({str(std)})'
HTML(data.to_html())

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,auc,fpr_1e-3,fpr_1e-2
dataset,feature,approach,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
rfw,facenet,gmm-discrete,92.31 (0.38),26.69 (6.23),50.44 (4.69)
rfw,facenet-webface,gmm-discrete,87.26 (0.55),19.23 (5.21),34.52 (2.52)
bfw,facenet-webface,gmm-discrete,95.73 (0.16),38.86 (0.86),60.93 (1.12)
bfw,arcface,gmm-discrete,97.37 (0.37),84.79 (1.25),89.56 (1.01)


In [25]:
def get_overall_stats_temp(calibration_method, nbins, dataset,feature,approach,att,n_clusters,fpr_def, new):
    filename = f'../experiments/{dataset}/{feature}/{approach}/{calibration_method}/nbins_{nbins}'
    if approach == 'faircal' or approach == 'gmm-discrete':
        filename += f'_nclusters_{n_clusters} ({new})'
    if approach == 'fsn':
        filename += f'_nclusters_{n_clusters}_fpr_1e-03'
    key = 'calibration' if approach in ['faircal', 'baseline', 'gmm-discrete'] else 'pre_calibration'
    
    results = np.load(f'{filename}.npy', allow_pickle=True).item()

    data = pd.DataFrame()
    data['folds'] = ['fold1', 'fold2', 'fold3', 'fold4', 'fold5']
    data['auc'] = np.nan
    data['fpr_1e-3'] = np.nan
    data['fpr_1e-2'] = np.nan
    data = data.set_index('folds')

    for fold in range(1,6):
        fpr = results['fold'+str(fold)]['fpr'][att]['Global'][key]
        tpr = results['fold'+str(fold)]['tpr'][att]['Global'][key]
        data.loc[f'fold{str(fold)}', 'auc'] = sklearn.metrics.auc(fpr,tpr)
        inter = np.interp(fpr_def, fpr, tpr)
        data.iloc[fold-1, 1:] = inter
    return data

# Accuracy table
keys = ['baseline', 'faircal', 'gmm-discrete']
error = [1e-3, 1e-2]
title_stat = ['AUROC', '0.1\% FPR', '1\% FPR']
n_clusters = 100
calibration = 'beta'

version = ['new', 'old']
data = pd.DataFrame(index=version)
metrics = ['auc', 'fpr_1e-3', 'fpr_1e-2']
for metric in metrics:
    data[metric] = ''

dataset = 'bfw'
feature = 'facenet-webface'
approach = 'faircal'

for current in version:
    nbins = 25 if dataset == 'bfw' else 10
    att = 'att' if dataset == 'bfw' else 'ethnicity'
    data_work = get_overall_stats_temp(calibration, nbins, dataset, feature, approach, att ,n_clusters,error, current)
    data_work *= 100
    for metric in metrics:
        mean = round(data_work[metric].mean(), 2)
        std = round(data_work[metric].std(), 2)
        data.loc[current][metric] = f'{str(mean)} ({str(std)})'

print(data)

FileNotFoundError: [Errno 2] No such file or directory: '../experiments/bfw/facenet-webface/faircal/beta/nbins_25_nclusters_100 (new).npy'