In [36]:
import pandas as pd
import numpy as np

dataset_used = 'compas'

if(dataset_used == 'compas'):
    compas_train = pd.read_csv('./../../data/compas_train.csv')
    compas_val = pd.read_csv('./../../data/compas_val.csv')
    compas_test = pd.read_csv('./../../data/compas_test.csv')

    y_train = compas_train.pop('two_year_recid') 
    y_test = compas_test.pop('two_year_recid')
    sensitive_features_train = compas_train['race']
    sensitive_features_test = compas_test['race']
    X_train = compas_train
    X_test = compas_test
    
    sensitive_features_train = sensitive_features_train.replace(0, 'African-American')
    sensitive_features_train = sensitive_features_train.replace(1, 'Caucasian')
    sensitive_features_test = sensitive_features_test.replace(0, 'African-American')
    sensitive_features_test = sensitive_features_test.replace(1, 'Caucasian')
    
elif(dataset_used == 'adult'):
    adult_train = pd.read_csv('./../../data/adult_train.csv')
    adult_val = pd.read_csv('./../../data/adult_val.csv')
    adult_test = pd.read_csv('./../../data/adult_test.csv')

    y_train = adult_train.pop('Income Binary') 
    y_test = adult_test.pop('Income Binary')
    sensitive_features_train = adult_train['sex']
    sensitive_features_test = adult_test['sex']
    X_train = adult_train
    X_test = adult_test
    
    sensitive_features_train = sensitive_features_train.replace(0, 'Female')
    sensitive_features_train = sensitive_features_train.replace(1, 'Male')
    sensitive_features_test = sensitive_features_test.replace(0, 'Female')
    sensitive_features_test = sensitive_features_test.replace(1, 'Male')
    
else:
    print('Invalid dataset_used variable.')

In [37]:
X_train.loc[0], y_train[0]

(Unnamed: 0                  7796.0
 sex                            0.0
 race                           0.0
 age_cat=25 to 45               1.0
 age_cat=Greater than 45        0.0
 age_cat=Less than 25           0.0
 priors_count=0                 0.0
 priors_count=1 to 3            1.0
 priors_count=More than 3       0.0
 c_charge_degree=F              1.0
 c_charge_degree=M              0.0
 Name: 0, dtype: float64,
 1.0)

In [38]:
import cvxpy as cp

def log_loss_function(pred, y, weights=None, eps=1e-15):
    n = len(y)

    pred = np.clip(pred, eps, 1 - eps)
    class1_cost = -y * np.log(pred)
    class0_cost = -(1 - y) * np.log(1 - pred)
    loss = class1_cost + class0_cost

    if(weights.all()):
        loss = np.dot(weights, loss)
    else:
        loss = loss.sum() / n

    return loss

def log_loss_grad_w(pred, y, eps=1e-15):
    n = len(y)

    pred = np.clip(pred, eps, 1 - eps)
    class1_cost = -y * np.log(pred)
    class0_cost = -(1 - y) * np.log(1 - pred)
    loss = class1_cost + class0_cost
    loss = loss.to_numpy()

    return loss

def zero_one_loss_grad_w(pred, y):
    loss_vec = []
    for (i,y_true) in enumerate(y):
        if(y_true == pred[i]):
            loss_vec.append(0)
        else:
            loss_vec.append(1)
            
    return np.asarray(loss_vec)
    

def project_W(w):
    x = cp.Variable(len(w))
    objective = cp.Minimize(0.5 * cp.sum_squares(w - x))
    constraints = [0 <= x, x <= 1, cp.sum(x) == 1]
    prob = cp.Problem(objective, constraints)
    prob.solve()
    
    return x.value

def bayesian_oracle(w):
    return w

In [39]:
# Meta Algo
from fairlearn.reductions import ExponentiatedGradient, DemographicParity, EqualizedOdds
from sklearn.linear_model import LogisticRegression
import sklearn.metrics

constraint_used = 'dp' # dp, eo
w = np.full((X_train.shape[0],), 1/X_train.shape[0])

if(constraint_used =='dp'):
    expgrad_X = ExponentiatedGradient(
        LogisticRegression(solver='liblinear', fit_intercept=True, class_weight='balanced'),
        constraints=DemographicParity(),
        eps=0.01,
        nu=1e-6)
elif(constraint_used == 'eo'):
    expgrad_X = ExponentiatedGradient(
        LogisticRegression(solver='liblinear', fit_intercept=True, class_weight='balanced'),
        constraints=EqualizedOdds(),
        eps=0.01,
        nu=1e-6)

expgrad_X.fit(X_train, y_train, sensitive_features=sensitive_features_train, sample_weight=w)

T = 10
eta = 1/np.sqrt(2*T)
w = np.full((X_train.shape[0],), 1/X_train.shape[0])
hs = []
for t in range(T):
    w += eta * zero_one_loss_grad_w(expgrad_X.predict(X_train), y_train)
    w = project_W(w)
    print(w.sum())
    # h_t = M(w_t) TODO: BAYESIAN ORACLE STEP.

1.0000000000000213
0.9999999999999757
0.9999999999999662
1.0000000000000229
1.0000000000000224
1.0000000000000135
1.0000000000000198
1.0000000000000053
1.000000000000007
0.9999999999999789
