# Tables for FairCal based on Salvador et al., 2022

### Tables Reproduced

1. Global Accuracy Measures
2. Fairness Calibration
3. Predictive Equality ('fpr at fpr')
4. Equal Opportunity ('fnr at fnr')

### Imports

In [1]:
import numpy as np
import pandas as pd
pd.set_option("display.precision", 2)
import seaborn as sns
import sklearn.metrics
import matplotlib.pyplot as plt
import os

### Data Manager to load pickle files and provide data based on specific table of interest

In [2]:
class DataManager:
    '''
    This class stores the information about the data corresponding to a specific dataset, feature, approach, and objective combination, among other factors.
    Input: 
        dataset (str), e.g. 'rfw'
        feature (str), e.g. 'facenet'
        approach (str), e.g. 'faircal'
        objective (str), e.g. 'accuracy'
        ...
        
    Methods
        load_pickle: loads a pickle file into a results dictionary
        provide_data: returns a dataframe based on pre-specified objective and other self attributes
        get_sensitive_attributes_subgroups: returns a list of sensitive attributes and a dictionary with keys = sensitive attributes and values = subgroups imposed by that attribute

    '''
    def __init__(self, dataset, feature, approach, objective, calibration_method='beta', n_clusters=100, measure='ece', at_error='1e-2', subgroup='African', fpr_def=[1e-3, 1e-2]):
        self.dataset = dataset
        self.feature = feature
        self.approach = approach
        self.objective = objective
        self.calibration_method = calibration_method
        self.n_clusters = n_clusters
        self.measure = measure
        self.at_error = at_error
        self.subgroup = subgroup
        self.fpr_def = fpr_def
        self.errors = 'fpr at fpr' if objective == 'predictive_equality' else 'fnr at fnr'
        self.nbins = 25 if dataset == 'bfw' else 10
        self.att = 'att' if dataset == 'bfw' else 'ethnicity'
        self.key = 'calibration' if approach in ['faircal', 'baseline', 'faircal-gmm', 'oracle'] else 'pre_calibration'
        self.sensitive_attributes, self.subgroups = self.get_sensitive_attributes_subgroups(self.dataset)
        self.load_pickle()        

    def load_pickle(self):
        """ Load files """
        filename = f'./experiments/{self.dataset}/{self.feature}/{self.approach}/{self.calibration_method}/nbins_{self.nbins}'
        if self.approach in ['faircal', 'faircal-gmm'] :
            filename += f'_nclusters_{self.n_clusters}'
        if self.approach == 'fsn':
            filename += f'_nclusters_{self.n_clusters}_fpr_1e-03'
        self.results = np.load(f'{filename}.npy', allow_pickle=True).item()

    def provide_data(self, objective):
        """ Collect appropriate data based on objective"""
        data = pd.DataFrame()
        data['folds'] = ['fold1', 'fold2', 'fold3', 'fold4', 'fold5']
        data = data.set_index('folds')
        
        if self.objective == 'accuracy':
            data['AUROC'] = np.nan
            data['TPR @ 0.1% FPR'] = np.nan
            data['TPR @ 1% FPR'] = np.nan
            for fold in range(1,6):
                fpr = self.results['fold'+str(fold)]['fpr'][self.att]['Global'][self.key]
                tpr = self.results['fold'+str(fold)]['tpr'][self.att]['Global'][self.key]
                data.loc[f'fold{str(fold)}', 'AUROC'] = sklearn.metrics.auc(fpr,tpr)
                inter = np.interp(self.fpr_def, fpr, tpr)
                data.iloc[fold-1, 1:] = inter
        
        elif self.objective == 'fairness_calibration':
            for fold in range(1, 6):
                for sensitive_attribute in self.sensitive_attributes:
                    for subgroup in self.subgroups[sensitive_attribute]:
                        data.loc[f'fold{str(fold)}', f'{subgroup}'] = self.results[f'fold{str(fold)}'][self.measure][sensitive_attribute][subgroup]
      
        elif self.objective == 'predictive_equality' or objective == 'equal_opportunity':
            data[self.at_error] = np.nan
            
            for fold in range(1,6):
                if self.subgroup == 'Global':
                    data.iloc[fold-1,:] = self.at_error
                else:
                    fpr_global = self.results['fold'+str(fold)]['fpr'][self.att]['Global'][self.key]
                    tpr_global = self.results['fold'+str(fold)]['tpr'][self.att]['Global'][self.key]
                    thr_global = np.fmin(self.results['fold'+str(fold)]['thresholds'][self.att]['Global'][self.key], 1)

                    fpr = self.results['fold'+str(fold)]['fpr'][self.att][self.subgroup][self.key]
                    tpr = self.results['fold'+str(fold)]['tpr'][self.att][self.subgroup][self.key]
                    thr = np.fmin(self.results['fold'+str(fold)]['thresholds'][self.att][self.subgroup][self.key], 1)

                    if self.errors == 'fpr at fpr':
                        thr_at_error = np.interp(self.at_error,fpr_global,thr_global)
                        data.iloc[fold-1,:] = np.interp(thr_at_error,thr[::-1],fpr[::-1])
                    elif self.errors == 'fnr at fnr':
                        thr_at_error = np.interp(1-np.array(self.at_error),tpr_global,thr_global)
                        data.iloc[fold-1,:] = 1-np.interp(thr_at_error,thr[::-1],tpr[::-1])  
        else:
            print('Please specify a valid objective.')
        return data

    @staticmethod
    def get_sensitive_attributes_subgroups(dataset):
        '''
        A helper function to get sensitive attributes per subgroup, depending on the dataset.
        
        '''
        if dataset == 'rfw':
            sensitive_attributes = ['ethnicity']
            subgroups = {'ethnicity':['African', 'Asian', 'Caucasian', 'Indian']}
        else:
            sensitive_attributes = ['e', 'g', 'att']
            subgroups = {
                'e':['B', 'A', 'W', 'I'],
                'g':['F','M'],
                'att': ['black_females', 'black_males', 'asian_females', 'asian_males', 'white_females', 'white_males', 'indian_females', 'indian_males']
            }
        return sensitive_attributes, subgroups
        

### Tables

#### A general multi-index table

In [3]:
def create_multi_table(objective, setting='global'):
    """ Create empty dataframe with correct columns """
    approaches = ['baseline', 'fsn', 'faircal', 'faircal-gmm']
    tuples = []
    
    if objective == 'fairness_calibration':
        indices = {
        'rfw' : {
            'facenet': ['African', 'Asian', 'Caucasian', 'Indian'],
            'facenet-webface': ['African', 'Asian', 'Caucasian', 'Indian'],
        },
        'bfw' : {
            'facenet-webface': ['B', 'A', 'W', 'I', 'F','M', 
                'black_females', 'black_males', 'asian_females', 'asian_males', 'white_females', 'white_males', 'indian_females', 'indian_males'],
            'arcface': ['B', 'A', 'W', 'I', 'F','M', 
                'black_females', 'black_males', 'asian_females', 'asian_males', 'white_females', 'white_males', 'indian_females', 'indian_males'],
            },
        }
        if setting == 'show_subgroups':
            for dataset in indices:
                for feature, sens in indices[dataset].items():
                    for att in sens:
                        for approach in approaches:
                            tuples.append((dataset, feature, att, approach))
            index = pd.MultiIndex.from_tuples(tuples, names=['dataset', 'feature', 'subgroup', 'approach'])
        else:
            for dataset in indices:
                for feature in indices[dataset]:
                    for approach in approaches:
                        tuples.append((dataset, feature, approach))
            index = pd.MultiIndex.from_tuples(tuples, names=['dataset', 'feature', 'approach'])
            
    
    else:
        indices = {
        'rfw' : ['facenet', 'facenet-webface'],
        'bfw' : ['facenet-webface', 'arcface']
        }
        for dataset in indices:
            for feature in indices[dataset]:
                for approach in approaches:
                    tuples.append((dataset, feature, approach))
        index = pd.MultiIndex.from_tuples(tuples, names=['dataset', 'feature', 'approach'])
    
    data = pd.DataFrame(index=index)
    return approaches, indices, data

In [4]:
def get_table_for(objective, global_error=1e-2, setting='global'):
    """ Fill dataframe with data and prepare for LaTeX conversion """
    approaches, indices, data = create_multi_table(objective=objective, setting=setting)
    
    if objective == 'accuracy':
        metrics = ['AUROC', 'TPR @ 0.1% FPR', 'TPR @ 1% FPR']
        for metric in metrics:
            data[metric] = ''
        error = [1e-3, 1e-2]
    
        for dataset in indices:
            for feature in indices[dataset]:
                for approach in approaches:
                    data_object = DataManager(dataset, feature, approach, objective, fpr_def=error)
                    data_work = data_object.provide_data(objective)
                    data_work *= 100
                    for metric in metrics:
                        mean = '%.2f' % data_work[metric].mean()
                        std = '%.2f' % data_work[metric].std()
                        data.loc[dataset, feature, approach][metric] = f'{str(mean)} ({str(std)})'
    
    elif objective == 'fairness_calibration':
        metrics = ['mean', 'aad', 'mad', 'std']
        
        if setting == 'show_subgroups':
            for metric in metrics:
                data[metric] = np.nan
            for dataset in indices:
                for feature in indices[dataset]:
                    for approach in approaches:
                        sensitive_attributes = indices[dataset][feature]
                        for att in sensitive_attributes:
                            data_object = DataManager(dataset, feature, approach, objective)
                            data_work = data_object.provide_data(objective)
                            data_work = data_work * 100
                            for subgroup in data_work.columns:
                                loc_select = data.loc[dataset, feature, subgroup, approach]
                                group_mean = data_work[subgroup].mean()
                                loc_select['mean'] = group_mean
                                loc_select['aad'] = np.abs(data_work[subgroup] - group_mean).mean()
                                loc_select['mad'] = np.abs(data_work[subgroup] - group_mean).max()
                                loc_select['std'] = np.std(data_work[subgroup])
        else:
            for metric in metrics:
                data[metric] = ''
            for dataset in indices:
                for feature in indices[dataset]:
                    for approach in approaches:
                        sensitive_attributes = indices[dataset][feature]
                        for att in sensitive_attributes:
                            data_object = DataManager(dataset, feature, approach, objective)
                            data_work = data_object.provide_data(objective)
                            data_work = data_work
                            
                            # computing metrics for averaged over each fold
                            mean = data_work.mean(axis=1)
                            data_metric = {
                                'mean': data_work.mean(axis=1),
                                'aad': data_work.sub(mean, axis=0).abs().mean(axis=1),
                                'mad': data_work.sub(mean, axis=0).abs().max(axis=1),
                                'std': data_work.std(axis=1)  
                            }
                            
                            # computing metrics for averaged over each subgroup
                            for metric in metrics:
                                mean = '%.2f' % (data_metric[metric].mean() * 100)
                                std = '%.2f' % (data_metric[metric].std() * 100)
                                data.loc[dataset, feature, approach][metric] = f'{mean} ({std})'
                        
    elif objective == 'predictive_equality' or objective == 'equal_opportunity':
        metrics = ['aad', 'mad', 'std']
        for metric in metrics:
            data[metric] = ''

        for dataset in indices:
            att = 'att' if dataset == 'bfw' else 'ethnicity'
            _, subgroups = DataManager.get_sensitive_attributes_subgroups(dataset) 
            for feature in indices[dataset]:
                for approach in approaches:
                    folds_x_subgroups = pd.DataFrame() 
                    for i, subgroup in enumerate(subgroups[att]):
                        data_object = DataManager(dataset, feature, approach, objective, subgroup=subgroup, at_error=global_error)
                        folds_x_subgroups[subgroup] = data_object.provide_data(objective)

                    # computing metrics for averaged over each subgroup
                    mean = folds_x_subgroups.mean(axis=1)
                    data_work = {'aad': folds_x_subgroups.sub(mean, axis=0).abs().mean(axis=1),
                                 'mad': folds_x_subgroups.sub(mean, axis=0).abs().max(axis=1),
                                 'std': folds_x_subgroups.std(axis=1)
                                }
                    # computing metrics for averaged over each fold
                    for metric in metrics:
                        mean = '%.2f' % (data_work[metric].mean() * 100)
                        std = '%.2f' % (data_work[metric].std() * 100)
                        data.loc[dataset, feature, approach][metric] = f'{mean} ({std})'
    else:
        print('Please specify a valid objective.')

    # Re-structure dataframe to have correct indices and column order
    data = data.reset_index()
    data = data.pivot(index='approach', columns=['dataset', 'feature']).reorder_levels(['dataset', 'feature', None], axis=1)

    good_order = []
    for tup in [('rfw', 'facenet'), ('rfw', 'facenet-webface'), ('bfw', 'facenet-webface'), ('bfw', 'arcface')]:
        if objective == 'accuracy':
            for metric in ['AUROC', 'TPR @ 0.1% FPR', 'TPR @ 1% FPR']: 
                    good_order.append(tup + (metric,))
                    
        elif objective == 'fairness_calibration':
            for metric in ['mean', 'aad', 'mad', 'std']: 
                    good_order.append(tup + (metric,))
        
        elif objective == 'predictive_equality' or objective == 'equal_opportunity':
            for metric in ['aad', 'mad', 'std']: 
                    good_order.append(tup + (metric,))
        
        else:
            print('Please specify a valid objective.')
    
    data = data[good_order]
    
    return data

In [5]:
import re
def get_latex_for(df, objective):
    """ Helper function that gets the LaTeX table ready to be printed """
    latex = df.to_latex()    

    if objective == 'accuracy':
        latex = latex.replace(' (', '$\pm$')
        latex = latex.replace(')', '')

    latex = re.sub('\((.*?)\)', '', latex)        

    latex = latex.replace('baseline', '& Baseline')
    latex = latex.replace('agenda', '& AGENDA')
    latex = latex.replace('fsn', '& FSN')
    latex = latex.replace('faircal', '& FairCal')
    latex = latex.replace('oracle', '& Oracle')
    latex = latex.replace('-gmm', '-GMM')
    latex = latex.replace('NaN', '--')

    return latex

In [6]:
def add_authors_result(df, objective, global_error):
    """ Helper function that adds the author's result for easy reproducibility """
    authors = pd.DataFrame(index=df.index, columns=df.columns)
    if objective == 'accuracy':
        # Create accuracy table
        authors.loc['baseline'] = np.array(['88.26 (0.19)', '18.42 (1.28)', '34.88 (3.27)', 
                                            '83.95 (0.22)', '11.18 (3.45)', '26.04 (2.11)', 
                                            '96.06 (0.16)', '33.61 (2.10)', '58.87 (0.92)', 
                                            '97.41 (0.34)', '86.27 (1.09)', '90.11 (0.87)'])
        authors.loc['faircal'] = np.array(['90.58 (0.29)', '23.55 (1.82)', '41.88 (1.99)', 
                                            '86.71 (0.25)', '20.64 (3.09)', '33.13 (1.67)', 
                                            '96.90 (0.17)', '46.74 (1.49)', '69.21 (1.19)', 
                                            '97.44 (0.34)', '86.28 (1.24)', '90.14 (0.86)'])
        authors.loc['agenda'] = np.array(['76.83 (0.57)', '8.32 (1.86)', '18.01 (1.44)',
                                            '74.51 (0.94)', '6.38 (0.78)', '14.98 (1.11)',
                                            '82.42 (0.45)', '15.95 (1.53)', '32.51 (1.24)', 
                                            '95.09 (0.55)', '69.61 (2.40)', '79.67 (2.06)'])
        authors.loc['fsn'] = np.array(['90.05 (0.29)', '23.01 (2.00)', '40.21 (2.09)',
                                        '85.84 (0.34)', '17.33 (3.01)', '32.90 (1.03)', 
                                        '96.77 (0.20)', '47.11 (1.23)', '69.92 (1.01)', 
                                        '97.35 (0.33)', '86.19 (1.13)', '90.06 (0.84)'])
        authors.loc['oracle'] = np.array(['89.74 (0.31)', '21.40 (3.54)', '411.83 (2.98)',
                                        '85.23 (0.18)', '16.71 (1.98)', '31.60 (1.08)',
                                        '97.28 (0.13)', '45.13 (1.45)', '67.56 (1.05)',
                                        '98.91 (0.12)', '86.41 (1.19)', '90.40 (0.91)'])
        authors.loc['gmm-discrete'] = np.nan

    if objective == 'fairness_calibration':
        # Create fairness-calibration table
        authors.loc['baseline'] = np.array(['6.37', '2.89', '5.73', '3.77', '5.55', '2.48', '4.97', '2.91', '6.77', '3.63', '5.96', '4.03', '2.57', '1.39', '2.94', '1.63'])
        authors.loc['faircal'] = np.array(['1.37', '0.28', '0.50', '0.34', '1.75', '0.41', '0.64', '0.45', '3.09', '1.34', '2.48', '1.55', '2.49', '1.30', '2.68', '1.52'])
        authors.loc['agenda'] = np.array(['7.71', '3.11', '6.09', '3.86', '5.71', '2.37', '4.28', '2.85', '13.21', '6.37', '12.91', '7.55', '5.14', '2.48', '5.92', '3.04'])
        authors.loc['fsn'] = np.array(['1.43', '0.35', '0.57', '0.40', '2.49', '0.84', '1.19', '0.91', '2.76', '1.38', '2.67', '1.60', '2.65', '1.45', '3.23', '1.71'])
        authors.loc['oracle'] = np.array(['1.18', '0.28', '0.53', '0.33', '1.35', '0.38', '0.66', '0.43', '2.23', '1.15', '2.63', '1.40', '1.41', '0.59', '1.30', '0.69'])
        authors.loc['gmm-discrete'] = np.nan

    if objective == 'predictive_equality' and global_error == 1e-2:
        # Create predictive equality table for FPR 1%
        authors.loc['baseline'] = np.array(['0.68', '1.02', '0.74', '0.67', '1.23', '0.79', '2.42', '7.48', '3.22', '0.72', '1.51', '0.85'])
        authors.loc['faircal'] = np.array(['0.28', '0.46', '0.32', '0.29', '0.57', '0.35', '0.80', '1.79', '0.95', '0.63', '1.46', '0.78'])
        authors.loc['agenda'] = np.array(['0.71', '1.14', '0.81', '0.73', '1.08', '0.78', '1.21', '3.09', '1.51', '0.65', '1.78', '0.84'])
        authors.loc['fsn'] = np.array(['0.37', '0.68', '0.46', '0.35', '0.61', '0.40', '0.87', '2.19', '1.05', '0.55', '1.27', '0.68'])
        authors.loc['oracle'] = np.array(['0.40', '0.69', '0.45', '0.41', '0.74', '0.48', '0.77', '1.71', '0.91', '0.83', '2.08', '1.07'])
        authors.loc['gmm-discrete'] = np.nan

    if objective == 'predictive_equality' and global_error == 1e-3:
        authors.loc['baseline'] = np.array(['0.10', '0.15', '0.10', '0.14', '0.26', '0.16', '0.29', '1.00', '0.40', '0.12', '0.30', '0.15'])
        authors.loc['faircal'] = np.array(['0.09', '0.14', '0.10', '0.09', '0.16', '0.10', '0.09', '0.20', '0.11', '0.11', '0.31', '0.15'])
        authors.loc['agenda'] = np.array(['0.11', '0.20', '0.13', '0.12', '0.23', '0.14', '0.14', '0.40', '0.18', '0.09', '0.23', '0.11'])
        authors.loc['fsn'] = np.array(['0.10', '0.18', '0.11', '0.11', '0.23', '0.23', '0.09', '0.20', '0.11', '0.11', '0.28', '0.14'])
        authors.loc['oracle'] =  np.array(['0.11', '0.19', '0.12', '0.11', '0.20', '0.13', '0.12', '0.25', '0.15', '0.12', '0.27', '0.14'])
        authors.loc['gmm-discrete'] = np.nan
    
    # Add Authors/Ours columns
    new_columns = []
    for col in df.columns:
        new_columns.append(col + ('Authors',))
        new_columns.append(col + ('Ours',))

    # Copy data
    result = pd.DataFrame(index=df.index, columns=pd.MultiIndex.from_tuples(new_columns))
    for col in result.columns:
        if 'Ours' in col:
            result[col] = df[col[:-1]]
        if 'Authors' in col:
            result[col] = authors[col[:-1]]

    # Remove std if objective is not accuracy
    if objective != 'accuracy':
        for approach in result.index:
            result.loc[approach] = result.loc[approach].str.replace('\((.*?)\)', '', regex=True)
    return result

#### 1. Global accuracy measures

In [7]:
accuracy_table = get_table_for('accuracy')
accuracy_table = add_authors_result(accuracy_table, objective='accuracy', global_error=1e-2)
accuracy_in_latex = get_latex_for(accuracy_table, 'accuracy')
print(accuracy_in_latex)

\begin{tabular}{lllllllllllllllllllllllll}
\toprule
{} & \multicolumn{12}{l}{rfw} & \multicolumn{12}{l}{bfw} \\
{} & \multicolumn{6}{l}{facenet} & \multicolumn{6}{l}{facenet-webface} & \multicolumn{6}{l}{facenet-webface} & \multicolumn{6}{l}{arcface} \\
{} & \multicolumn{2}{l}{AUROC} & \multicolumn{2}{l}{TPR @ 0.1\% FPR} & \multicolumn{2}{l}{TPR @ 1\% FPR} & \multicolumn{2}{l}{AUROC} & \multicolumn{2}{l}{TPR @ 0.1\% FPR} & \multicolumn{2}{l}{TPR @ 1\% FPR} & \multicolumn{2}{l}{AUROC} & \multicolumn{2}{l}{TPR @ 0.1\% FPR} & \multicolumn{2}{l}{TPR @ 1\% FPR} & \multicolumn{2}{l}{AUROC} & \multicolumn{2}{l}{TPR @ 0.1\% FPR} & \multicolumn{2}{l}{TPR @ 1\% FPR} \\
{} &       Authors &          Ours &        Authors &          Ours &       Authors &          Ours &         Authors &          Ours &        Authors &          Ours &       Authors &          Ours &         Authors &          Ours &        Authors &          Ours &       Authors &          Ours &       Authors &          Ours & 

  latex = df.to_latex()


#### 2. Fairness Calibration

In [12]:
fairness_cal_table = get_table_for(objective='fairness_calibration', setting='global')
fairness_cal_table = add_authors_result(fairness_cal_table, 'fairness_calibration', global_error=1e-2)
fairness_cal_table = fairness_cal_table['rfw', 'facenet-webface']
fairness_cal_in_latex = get_latex_for(fairness_cal_table, 'fairness_calibration')
print(fairness_cal_in_latex)

\begin{tabular}{lllllllll}
\toprule
{} & \multicolumn{2}{l}{mean} & \multicolumn{2}{l}{aad} & \multicolumn{2}{l}{mad} & \multicolumn{2}{l}{std} \\
{} & Authors &   Ours & Authors &   Ours & Authors &   Ours & Authors &   Ours \\
approach    &         &        &         &        &         &        &         &        \\
\midrule
& Baseline    &    5.55 &  7.08  &    2.48 &  2.66  &    4.97 &  4.93  &    2.91 &  3.66  \\
& FairCal     &    1.75 &  3.76  &    0.41 &  0.79  &    0.64 &  1.40  &    0.45 &  1.06  \\
& FairCal-GMM &     -- &  3.67  &     -- &  0.55  &     -- &  1.07  &     -- &  0.78  \\
& FSN         &    2.49 &  3.90  &    0.84 &  0.54  &    1.19 &  1.05  &    0.91 &  0.75  \\
\bottomrule
\end{tabular}



  fairness_cal_table = fairness_cal_table['rfw', 'facenet-webface']
  latex = df.to_latex()


#### 3. Predictive Equality ('fpr at fpr') and 4. Equal Opportunity ('fnr at fnr')

In [None]:
objectives = ['predictive_equality', 'equal_opportunity']
global_errors = [1e-3, 1e-2]


objectives = ['equal_opportunity']
for objective in objectives:
    for global_error in global_errors:
        current_df = get_table_for(objective, global_error=global_error)
        print(f'\n{objective} at a global error rate = {global_error * 100}%')                
        display(HTML(current_df.to_html()))

eq_opp_in_latex = get_latex_for(current_df, 'equal_opportunity')
print(eq_opp_in_latex)

### Visualising example results for predictive equality and equal opportunity

### Example conversion from dataframe to latex

In [None]:
txt = current_df.to_latex()
print(txt)