In [147]:
# Import Dataset
dataset_used = 'compas'

if(dataset_used == 'compas'):
    compas_train = pd.read_csv('./../../data/compas_train.csv')
    compas_val = pd.read_csv('./../../data/compas_val.csv')
    compas_test = pd.read_csv('./../../data/compas_test.csv')

    y_train = compas_train.pop('two_year_recid') 
    y_test = compas_test.pop('two_year_recid')
    sensitive_features_train = compas_train['race']
    sensitive_features_test = compas_test['race']
    X_train = compas_train
    X_test = compas_test
    X_test_a0 = X_test[X_test.race.eq(0)]
    X_test_a1 = X_test[X_test.race.eq(1)]
    a_indices = dict()
    a_indices['a0'] = X_train.index[X_train.race.eq(0)].tolist()
    a_indices['a1'] = X_train.index[X_train.race.eq(1)].tolist()
    a_indices['all'] = X_train.race.tolist()
    
    sensitive_features_train = sensitive_features_train.replace(0, 'African-American')
    sensitive_features_train = sensitive_features_train.replace(1, 'Caucasian')
    sensitive_features_test = sensitive_features_test.replace(0, 'African-American')
    sensitive_features_test = sensitive_features_test.replace(1, 'Caucasian')
    
elif(dataset_used == 'adult'):
    adult_train = pd.read_csv('./../../data/adult_train.csv')
    adult_val = pd.read_csv('./../../data/adult_val.csv')
    adult_test = pd.read_csv('./../../data/adult_test.csv')

    y_train = adult_train.pop('Income Binary') 
    y_test = adult_test.pop('Income Binary')
    sensitive_features_train = adult_train['sex']
    sensitive_features_test = adult_test['sex']
    X_train = adult_train
    X_test = adult_test
    X_test_a0 = X_test[X_test.sex.eq(0)]
    X_test_a1 = X_test[X_test.sex.eq(1)]
    a_indices = dict()
    a_indices['a0'] = X_train.index[X_train.sex.eq(0)].tolist()
    a_indices['a1'] = X_train.index[X_train.sex.eq(1)].tolist()
    a_indices['all'] = X_train.sex.tolist()
    
    sensitive_features_train = sensitive_features_train.replace(0, 'Female')
    sensitive_features_train = sensitive_features_train.replace(1, 'Male')
    sensitive_features_test = sensitive_features_test.replace(0, 'Female')
    sensitive_features_test = sensitive_features_test.replace(1, 'Male')
    
else:
    print('Invalid dataset_used variable.')

In [148]:
# Set Hyperparameters (M, B, T, gamma_1, gamma_2)
card_A = 2 # Cardinality of A
nu = 0.01 # 0.001 is too large for efficiency
M = 1
B = M
T = len(X_test)/(nu ** 2)
gamma_1 = nu/B
gamma_2 = nu/B
delta_1 = (2 * len(X_train)) / gamma_1
delta_2 = (2 * card_A) / gamma_2
epsilon = 0.1

In [150]:
### Set mock t_weights that should come from Meta-Algo ###
t_weights = np.full((X_train.shape[0],), 1/X_train.shape[0])
print(t_weights)
print(len(t_weights))

[0.00023685 0.00023685 0.00023685 ... 0.00023685 0.00023685 0.00023685]
4222


In [151]:
def discretize_w(w, buckets):
    for i, w_i in enumerate(w):
        for b in buckets:
            if(b[0] <= w_i <= b[1]):
                w[i] = b[1]
    
    return w

def generate_w():
    r = [ran.random() for i in range(len(X_train))]
    s = sum(r)
    r = [ i/s for i in r ]
    return r

In [152]:
# Initialize N(gamma_1, W)
gamma_1_num_buckets = np.ceil(math.log(delta_1, 1 + gamma_1))
gamma_1_buckets = []
gamma_1_buckets.append((0, 1/delta_1))
for i in range(int(gamma_1_num_buckets)):
    bucket_lower = ((1 + gamma_1) ** i) * (1/delta_1)
    bucket_upper = ((1 + gamma_1) ** (i + 1)) * (1/delta_1)
    gamma_1_buckets.append((bucket_lower, bucket_upper))

In [160]:
from collections import defaultdict
from fairlearn.reductions import ExponentiatedGradient, DemographicParity, EqualizedOdds
from sklearn.linear_model import LogisticRegression

def best_response_lambda(h):      
    lambda_w_a_ap = B
    if(ran.randint(0, 1) == 0):
        return ('a0', 'a1', tuple(discretize_w(generate_w(), gamma_1_buckets)))
    else:
        return ('a1', 'a0', tuple(discretize_w(generate_w(), gamma_1_buckets)))
    
def test_lambda_dict():
    lambda_dict = defaultdict(int)
    
    for i in range(50):
        lambda_dict[('a0', 'a1', tuple(discretize_w(generate_w(), gamma_1_buckets)))] = 1
    
    for i in range(50):
        lambda_dict[('a1', 'a0', tuple(discretize_w(generate_w(), gamma_1_buckets)))] = 1
        
    return lambda_dict

lambda_dict = test_lambda_dict()

# L_i is just c^1_i(\lambda) - c^0_i(\lambda)
def L_i(i, y_i, t_weights, lambda_dict, a_indices):
    return c_1_i(i, y_i, t_weights, lambda_dict, a_indices) - c_0_i(i, y_i, t_weights, lambda_dict)

def zero_one_loss(y_pred, y_true):
    if(y_pred == y_true):
        return 0
    else:
        return 1

def c_1_i(i, y_i, t_weights, lambda_dict, a_indices):
    return zero_one_loss(1, y_i)*t_weights[i] + delta_i(i, t_weights, lambda_dict, a_indices)
    
def c_0_i(i, y_i, t_weights, lambda_dict):
    return zero_one_loss(0, y_i)*t_weights[i]

def delta_i(i, t_weights, lambda_dict, a_indices):
    # get a_i
    if(a_indices['all'][i] == 0):
        a_i = 'a0'
        a_p = 'a1'
    else:
        a_i = 'a1'
        a_p = 'a0'
        
    # weights quotient
    quotient = t_weights[i]/t_weights[a_indices[a_i]].sum()  
    
    # lambda difference. iterate over all keys of lambda_dict (the rest are 0)
    final_sum = 0
    for tup in lambda_dict:
        if(tup[0] == a_i):
            if (tup[1], tup[0], tup[2]) in lambda_dict:
                diff = lambda_dict[tup] - lambda_dict[(tup[1], tup[0], tup[2])]
            else:
                diff = lambda_dict[tup]
                
        elif(tup[0] == a_p):
            if (tup[1], tup[0], tup[2]) in lambda_dict:
                diff = lambda_dict[(tup[1], tup[0], tup[2])] - lambda_dict[tup] 
            else:
                diff = - lambda_dict[tup]

        final_sum += diff*quotient
    
    return final_sum
    
def weighted_classification(X_train, y_train, t_weights, sensitive_features_train, a_indices, 
                            lambda_dict, constraint_used='dp', eta=1/np.sqrt(2*T)):
    
    # Learning becomes a weighted classification problem, dependent on L_i
    weights = []
    for i in range(len(X_train)):
        y_i = y_train[i]
        weights.append(eta * L_i(i, y_i, t_weights, lambda_dict, a_indices) + 0.5)
        
    if(constraint_used =='dp'):
        expgrad_X = ExponentiatedGradient(
            LogisticRegression(solver='liblinear', fit_intercept=True, class_weight='balanced'),
            constraints=DemographicParity(),
            eps=0.01,
            nu=1e-6)
    elif(constraint_used == 'eo'):
        expgrad_X = ExponentiatedGradient(
            LogisticRegression(solver='liblinear', fit_intercept=True, class_weight='balanced'),
            constraints=EqualizedOdds(),
            eps=0.01,
            nu=1e-6)
        
    expgrad_X.fit(X_train, y_train, sensitive_features=sensitive_features_train, sample_weight=weights)
    return expgrad_X

In [161]:
### MAIN ALGORITHM ###
# On time step t...

hypotheses = []
h_1 = 0
hypotheses.append(h_1)
for t in range(int(T)):
    lambda_t = best_response_lambda(h)
    lambda_dict[lambda_t] += B
    h_t = weighted_classification(X_train, y_train, t_weights, 
                                        sensitive_features_train, a_indices, lambda_dict)
    hypotheses.append(h_t)