In [1]:
import pandas as pd
import os


In [2]:
def demographic_parity(feature, target, data):
    ###### Demographic parity feature check #######
    groups = data[feature].unique()
    positive_rates = []
    for group in groups:
        group_data = data[data[feature] == group]
        positive_rate = group_data[target].mean()
        positive_rates.append(positive_rate)
    max_rate = max(positive_rates)
    min_rate = min(positive_rates)
    if min_rate == 0:
        return float('inf')
    return max_rate / min_rate

In [3]:
def equalized_opportunity(feature, target, data):
    ###### Equalized opportunity feature check #######
    groups = data[feature].unique()
    tpr_rates = []
    for group in groups:
        group_data = data[data[feature] == group]
        positive_cases = group_data[group_data[target] == 1]
        if len(positive_cases) == 0:
            continue
        tpr = positive_cases[target].mean()
        tpr_rates.append(tpr)
    max_tpr = max(tpr_rates)
    min_tpr = min(tpr_rates)
    if min_tpr == 0:
        return float('inf')
    return max_tpr / min_tpr

In [4]:
def equalized_odds(feature, target, data):
    ###### Equalized odds feature check #######
    groups = data[feature].unique()
    tpr_rates = []
    fpr_rates = []
    for group in groups:
        group_data = data[data[feature] == group]
        positive_cases = group_data[group_data[target] == 1]
        negative_cases = group_data[group_data[target] == 0]
        if len(positive_cases) > 0:
            tpr = positive_cases[target].mean()
            tpr_rates.append(tpr)
        if len(negative_cases) > 0:
            fpr = negative_cases[target].mean()
            fpr_rates.append(fpr)
    max_tpr = max(tpr_rates)
    min_tpr = min(tpr_rates)
    max_fpr = max(fpr_rates)
    min_fpr = min(fpr_rates)
    tpr_ratio = float('inf') if min_tpr == 0 else max_tpr / min_tpr
    fpr_ratio = float('inf') if min_fpr == 0 else max_fpr / min_fpr
    return max(tpr_ratio, fpr_ratio)

In [5]:
import numpy as np
def individual_fairness(df,classifier_outputs,target,omitted, epsilon=0.1,delta=0.5):
    ###### Individual fairness feature check #######
    # All individuals with feature vectors that are less than epsilon in mahalobis distance apart

    # Amongst these individuals find the ones with the highest difference in classifier prediction.
    from scipy.spatial import distance
    from itertools import combinations
    feature_vectors = df.drop(columns=[target]+omitted).values
    max_diff = 0
    for (i, vec1), (j, vec2) in combinations(enumerate(feature_vectors), 2):
        dist = distance.mahalanobis(vec1, vec2, np.linalg.inv(np.cov(feature_vectors.T)))
        if dist < epsilon:
            diff = abs(classifier_outputs[i] - classifier_outputs[j])
            if diff > max_diff:
                max_diff = diff
    return max_diff
