In [1]:
import pandas as pd
import numpy as np
from fair_logistic_reg import FairLogisticRegression
from custom_log_reg import CustomLogisticRegression
from sklearn.linear_model import LogisticRegression as LogReg
import utils
from autograd import grad

In [2]:
class test():
    def func(self, x, y):
        return 2*x*y
    
    def grad_func(self, x, y):
        g = grad(self.func,0)
        return g(x,y)

In [3]:
def func(x, y):
    return 2*x*y
    
def grad_func(x, y):
    g = grad(func,0)
    return g(x,y)

In [4]:
grad_func(2.0,3.0)

6.0

In [5]:
t = test()

In [6]:
t.grad_func(2.0,3.0)

6.0

In [7]:
compas = utils.load_compas_alt()["train"]
RESPONSE = "two_year_recid"
SENSITIVE = "is_Caucasian"

In [8]:
compas.dtypes

priors_count                  int64
crime_factor                  int64
gender_factor                 int64
is_Caucasian                  int64
age_factor_Greater than 45    uint8
age_factor_Less than 25       uint8
two_year_recid                int64
dtype: object

In [9]:
for col in compas.columns:
    compas[col] = compas[col].apply(lambda x: float(x))

In [10]:
compas.dtypes

priors_count                  float64
crime_factor                  float64
gender_factor                 float64
is_Caucasian                  float64
age_factor_Greater than 45    float64
age_factor_Less than 25       float64
two_year_recid                float64
dtype: object

In [11]:
x = compas.drop(RESPONSE, axis = 1)
y = compas[RESPONSE]
z = compas[SENSITIVE]

In [12]:
clr = CustomLogisticRegression()
flr = FairLogisticRegression()
sklr = LogReg()

In [13]:
clr.fit(x,y, epochs=100)

100%|██████████| 100/100 [00:28<00:00,  3.47it/s]


In [14]:
flr.fit(x,y,z, epochs=100)

100%|██████████| 100/100 [02:12<00:00,  1.32s/it]


In [15]:
sklr.fit(x,y)

LogisticRegression()

In [16]:
compas_test = utils.load_compas_alt()["test"]
x_test = compas_test.drop(RESPONSE, axis = 1)
y_test = compas_test[RESPONSE]
z_test = compas_test[SENSITIVE]

In [17]:
from sklearn.metrics import accuracy_score
pred = flr.predict(x_test)
accuracy = accuracy_score(y_test, pred)
print("Fair log reg accuracy: ", accuracy)
pred = clr.predict(x_test)
accuracy = accuracy_score(y_test, pred)
print("Custom log reg accuracy: ", accuracy)
pred = sklr.predict(x_test)
accuracy = accuracy_score(y_test, pred)
print("Sklearn log reg accuracy: ", accuracy)

Fair log reg accuracy:  0.6057928325969563
Custom log reg accuracy:  0.6008836524300442
Sklearn log reg accuracy:  0.6681394207167403


In [18]:
def spd(pred, protected_class, positive=True):
        """
        Equation: |P(Y_pred = y | Z = 1) - P(Y_pred = y | Z = 0)|
        Assumes that the positive class is the desired outcome and
            that the protected_class is 0/1 binary"""
        z_1 = [y_hat for y_hat, z in zip(
            pred, np.array(protected_class)) if z == 1]
        z_0 = [y_hat for y_hat, z in zip(
            pred, np.array(protected_class)) if z == 0]

        if not positive:
            z_1 = [0 if z == 1 else 1 for z in z_1]
            z_0 = [0 if z == 1 else 1 for z in z_1]
        """if len(z_1)+len(z_0)!=len(pred):
            print("NOT EQUAL")"""
        return abs(sum(z_1)/len(z_1)-sum(z_0)/len(z_0))
    
def eo_sum(pred, prot, true):
    """
    Equation: |P(Y_pred = y_pred | Y_true = y_true, Z = 1) - P(Y_pred = y_pred | Y_true = y_true, Z = 0)|
    Assumes prot is 0/1 binary"""
    z1_y0 = [y_hat for y_hat, z, y in zip(
        pred, prot, true) if z == 1 and y == 0]
    z0_y0 = [y_hat for y_hat, z, y in zip(
        pred, prot, true) if z == 0 and y == 0]
    z1_y1 = [y_hat for y_hat, z, y in zip(
        pred, prot, true) if z == 1 and y == 1]
    z0_y1 = [y_hat for y_hat, z, y in zip(
        pred, prot, true) if z == 0 and y == 1]
    return abs(sum(z1_y1)/len(z1_y1)-sum(z0_y1)/len(z0_y1)) + abs(sum(z1_y0)/len(z1_y0)-sum(z0_y0)/len(z0_y0))



In [19]:
pred = flr.predict(x_test)
accuracy = spd(pred, z_test)
print("Fair log reg spd: ", accuracy)
pred = clr.predict(x_test)
accuracy = spd(pred, z_test)
print("Custom log reg spd: ", accuracy)
pred = sklr.predict(x_test)
accuracy = spd(pred, z_test)
print("Sklearn log reg spd: ", accuracy)

Fair log reg spd:  0.272803226735811
Custom log reg spd:  0.19035500496174657
Sklearn log reg spd:  0.21888984922692778


In [20]:
#eosum test
flr = FairLogisticRegression(fairness_metric = "eo_sum")
flr.fit(x,y,z, epochs=100)
pred = flr.predict(x_test)
accuracy = eo_sum(pred, z_test, y_test)
print("Fair log reg eo_sum: ", accuracy)
pred = clr.predict(x_test)
accuracy = eo_sum(pred, z_test, y_test)
print("Custom log reg eo_sum: ", accuracy)
pred = sklr.predict(x_test)
accuracy = eo_sum(pred, z_test, y_test)
print("Sklearn log reg eo_sum: ", accuracy)

100%|██████████| 100/100 [02:12<00:00,  1.33s/it]

Fair log reg eo_sum:  0.5105854635097686
Custom log reg eo_sum:  0.3426659933206489
Sklearn log reg eo_sum:  0.4101811935832253





In [21]:
clr.weights

array([-7975.05340895,  3595.52287777,  4374.79486456,  3181.31609592,
        2509.69646316,   432.81716511])

In [22]:
flr.weights

array([-417.34245653,  195.68835849,  203.66933915,  221.23483332,
        135.19105549,   -4.11789897])

In [23]:
flr.predict(x_test)

[0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 1,
 0,
 1,
 0,
 1,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 1,
 0,
 1,
 1,
 0,
 1,
 0,
 0,
 0,
 1,
 1,
 0,
 1,
 0,
 0,
 0,
 1,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 1,
 0,
 1,
 1,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 1,
 1,
 0,
 1,
 0,
 1,
 0,
 1,
 0,
 1,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 1,
 0,
 1,
 1,
 0,
 1,
 0,
 0,
 1,
 0,
 1,
 1,
 0,
 1,
 1,
 0,
 0,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 1,
 0,
 0,
 1,
 0,
 0,
 0,
 1,
 0,
 1,
 1,
 0,
 1,
 0,
 1,
 0,
 1,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 1,
 0,
 1,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 1,
 1,
 1,
 0,
 1,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
