In [1]:
import numpy as np
import pandas as pd
from sklearn import metrics

In [2]:
def predict(prob):
    return (prob >= 0.5)

def get_accuracy(y_pred, y):
    return sum(1 for i in range(len(y)) if (predict(y_pred[i]) == y[i]))*100.0/len(y)

# 1 - METRICS FROM WADSWORTH PAPER
def get_roc_auc_score(y_pred, y):
    return sklearn.metrics.roc_auc_score(y, y_pred)

# protected variable z (array of 1s or 0s); calculate for demographic y_select (1 or 0)
def demographic_parity_gap(y_pred, z, y_select):
    m = len(y_pred)
    prop_0 = sum(1 for i in range(m) if (z[i] == 0 and predict(y_pred[i]) == y_select))/(m-sum(z[i]))
    prop_1 = sum(1 for i in range(m) if (z[i] == 1 and predict(y_pred[i]) == y_select))/sum(z[i])
    return np.abs(prop_0 - prop_1)

def false_positive_gap(y_pred, y, z):
    m = len(y_pred)
    fp_0 = sum(1 for i in range(m) if (z[i] == 0 and predict(y_pred[i] > y[i])))/(m-sum(z[i]))
    fp_1 = sum(1 for i in range(m) if (z[i] == 1 and predict(y_pred[i] > y[i])))/sum(z[i])
    return np.abs(fp_0 - fp_1)

def false_negative_gap(y_pred, y, z):
    m = len(y_pred)
    fn_0 = sum(1 for i in range(m) if (z[i] == 0 and predict(y_pred[i] < y[i])))/(m-sum(z[i]))
    fn_1 = sum(1 for i in range(m) if (z[i] == 1 and predict(y_pred[i] < y[i])))/sum(z[i])
    return np.abs(fn_0 - fn_1)
    
# 2 - METRICS FROM ZHANG PAPER (somewhat redundant, but coded again for simplicity)
def confusion_matrix(y_pred, y, z, z_select):
    m = len(z)
    m_select = sum(1 for i in range(m) if (z[i] == z_select))
    true_pos = sum(1 for i in range(m) if (z[i] == z_select and y[i] == 1 and predict(y_pred[i]) == 1))
    true_neg = sum(1 for i in range(m) if (z[i] == z_select and y[i] == 0 and predict(y_pred[i]) == 0))
    false_pos = sum(1 for i in range(m) if (z[i] == z_select and predict(y_pred[i]) > y[i]))
    false_neg = sum(1 for i in range(m) if (z[i] == z_select and y[i] > predict(y_pred[i])))
    return [true_pos, true_neg, false_pos, false_neg]
