In [2]:
import pandas as pd
import numpy as np

In [4]:
compas_train = pd.read_csv('./data/compas_train_normalized.csv')
compas_val = pd.read_csv('./data/compas_val.csv')
compas_test = pd.read_csv('./data/compas_test.csv')

y_train = compas_train.pop('two_year_recid') 
y_test = compas_test.pop('two_year_recid')
sensitive_features_train = compas_train['race']
sensitive_features_test = compas_test['race']
X_train = compas_train
X_test = compas_test

sensitive_features_train = sensitive_features_train.replace(0, 'African-American')
sensitive_features_train = sensitive_features_train.replace(1, 'Caucasian')
sensitive_features_test = sensitive_features_test.replace(0, 'African-American')
sensitive_features_test = sensitive_features_test.replace(1, 'Caucasian')

In [9]:
n = len(sensitive_features_test)
y_pred = np.ones(n)
y_pred[:200] = 0

In [54]:
def evaluate_fairness(y_true, y_pred, sensitive_features):
        """
        Evaluates fairness of the final majority vote classifier over T_inner hypotheses
        on the test set.
        #NOTE: defined in the meta_algo file, but we chose:
        a0 := African-American (COMPAS), Female (Adult)
        a1 := Caucasian (COMPAS), Male (Adult)

        :return: list. subgroups in sensitive_features.
        :return: dict. recidivism_pct for each group.
        """
        groups = np.unique(sensitive_features.values)
        pos_count = {}
        dp_pct = {}
        eo_y0_pct = {}
        eo_y1_pct = {}
        
        for index, group in enumerate(groups):
            # Demographic Parity
            indices = {}
            indices[group] = sensitive_features.index[sensitive_features == group]
            dp_pct[group] = sum(y_pred[indices[group]])/len(indices[group])

            # Equalized Odds
            y1_indices = {}
            y0_indices = {}
            y1_indices[group] = sensitive_features.index[(sensitive_features == group) & (y_true == 1)]
            y0_indices[group] = sensitive_features.index[(sensitive_features == group) & (y_true == 0)]
            eo_y0_pct[group] = sum(y_pred[y0_indices[group]])/len(y0_indices[group])   
            eo_y1_pct[group] = sum(y_pred[y1_indices[group]])/len(y1_indices[group])
        
        gaps = {}
        group_metrics = {} # a dictionary of dictionaries

        gaps['dp'] = abs(dp_pct[groups[0]] - dp_pct[groups[1]])
        gaps['eo_y0'] = abs(eo_y0_pct[groups[0]] - eo_y0_pct[groups[1]])
        gaps['eo_y1'] = abs(eo_y1_pct[groups[0]] - eo_y1_pct[groups[1]])
        group_metrics['dp'] = dp_pct
        group_metrics['eo_y0'] = eo_y0_pct
        group_metrics['eo_y1'] = eo_y1_pct
        
        return groups, group_metrics, gaps

In [53]:
evaluate_fairness(y_test, y_pred, sensitive_features_test)

(array(['African-American', 'Caucasian'], dtype=object),
 {'dp': {'African-American': 0.6080246913580247,
   'Caucasian': 0.6421568627450981},
  'eo_y0': {'African-American': 0.6595744680851063,
   'Caucasian': 0.6585365853658537},
  'eo_y1': {'African-American': 0.5683060109289617,
   'Caucasian': 0.6172839506172839}},
 {'dp': 0.03413217138707336,
  'eo_y0': 0.0010378827192526474,
  'eo_y1': 0.0489779396883222})