In [1]:
from utils import *
from attacks import *
from train import *
from tempeh.configurations import datasets

Using TensorFlow backend.


In this notebook, we go through some attacks presented in the paper

# Data generation  

We get the COMPAS data from tempeh  

In [2]:
compas_dataset = datasets['compas']()
X_train, X_test = compas_dataset.get_X(format=pd.DataFrame)
y_train, y_test = compas_dataset.get_y(format=pd.Series)
sensitive_features_train, sensitive_features_test = compas_dataset.get_sensitive_features('race', format=pd.Series)

In [3]:
x_train = X_train.to_numpy()
x_test = X_test.to_numpy()
y_train = y_train.to_numpy()
y_test = y_test.to_numpy()
g_train = sensitive_features_train.to_numpy()
g_test = sensitive_features_test.to_numpy()
_, g_train = np.unique(g_train, return_inverse=True)
_, g_test = np.unique(g_test, return_inverse=True)

In [4]:
X = np.concatenate((x_train,x_test),axis=0)
Y = np.append(y_train,y_test)
G = np.append(g_train,g_test)

# G is included in X
X = np.concatenate([X, G.reshape(-1,1)], axis=1)

We use SVM + rbf kernel to generate data. Keep 60% as clean data and 40% left out as hard examples  
$|D_c|:|D_{test}|:|D_k| = 4:1:1$

In [5]:
data = generate_dataset(X, Y, G, 'rbf',
                        clean_ratio=0.6, attacker_split=1.0/6, 
                        test_split=0.2, random_seed=2)

In [6]:
X_TRAIN = data['x_train']
Y_TRAIN = data['y_train']
G_TRAIN = data['g_train']

X_TEST = data['x_test']
Y_TEST = data['y_test']
G_TEST = data['g_test']

X_ATTACK = data['x_attacker']
Y_ATTACK = data['y_attacker']
G_ATTACK = data['g_attacker']

# hard examples
X_NOISE = data['x_noise']
Y_NOISE = data['y_noise']
G_NOISE = data['g_noise']

# attacker set is augmented by hard examples

X_ATTACK = np.concatenate([X_ATTACK, X_NOISE])
Y_ATTACK = np.append(Y_ATTACK, Y_NOISE)
G_ATTACK = np.append(G_ATTACK, G_NOISE)

In [7]:
datatrain = {
    'x_train': X_TRAIN,
    'y_train': Y_TRAIN,
    'g_train': G_TRAIN
}
attacker_set = {
    'x_train': X_ATTACK,
    'y_train': Y_ATTACK,
    'g_train': G_ATTACK
}

In [8]:
epsilon = 0.1 #fraction of poisoning data
base_model = LogisticRegression(max_iter=2000, solver='lbfgs')

# Random sampling attack

In [9]:
X_POI, Y_POI, G_POI = uniform_sampling(datatrain, attacker_set, epsilon, flip=False)

### Unconstrained model

In [10]:
unconstrained_model = train_unconstraind_model(base_model, X_POI, Y_POI)

In [11]:
pred_unconstrained = unconstrained_model(X_TEST)
print("Accuracy:", accuracy(Y_TEST, pred_unconstrained))
print("Fairness gap:", max(EO(G_TEST, pred_unconstrained, Y_TEST)))

Accuracy: 0.9337121212121212
Fairness gap: 0.2670937195376689


### Fair model by Reductions  
We use fairness gap $\delta=0.01$ through out

In [12]:
gap = 0.01
fair_model = train_fair_model_reduction(base_model, X_POI, Y_POI, G_POI, EqualizedOdds(), gap)

In [13]:
pred_fair = np.array(fair_model(X_TEST))
print("Accuracy:",  accuracy(Y_TEST, pred_fair))
print("Fairness gap:", max(EO(G_TEST, pred_fair, Y_TEST))) #fairness gap measure on test data

Accuracy: 0.9278107539276724
Fairness gap: 0.0362830839917514


### Fair model by Post processing  
For post processing, exact fairness is satisfied on training data ($\delta=0$)

In [14]:
fair_model_PP = train_fair_model_post_processing(base_model, X_POI, Y_POI.astype(int), G_POI.astype(int), 'equalized_odds')

In [15]:
pred_fair_PP = np.array(fair_model_PP(X_TEST, G_TEST.astype(int)))
print("Accuracy:",  accuracy(Y_TEST, pred_fair_PP))
print("Fairness gap:", max(EO(G_TEST, pred_fair_PP, Y_TEST)))

Accuracy: 0.8346078826582257
Fairness gap: 0.04908101670507625


# Algorithm 1  
For algorithm 1 we use $L = \lambda/\epsilon = 1$

In [16]:
X_POI, Y_POI, G_POI = algorithm1(datatrain, attacker_set, epsilon, L=1, num_iters=2000, r=1, lr=0.001, flip=False)

In [17]:
unconstrained_model = train_unconstraind_model(base_model, X_POI, Y_POI)

In [18]:
pred_unconstrained = unconstrained_model(X_TEST)
print("Accuracy:",  accuracy(Y_TEST, pred_unconstrained))
print("Fairness gap:", max(EO(G_TEST, pred_unconstrained, Y_TEST)))

Accuracy: 0.8863636363636364
Fairness gap: 0.4091352179362206


In [19]:
fair_model = train_fair_model_reduction(base_model, X_POI, Y_POI, G_POI, EqualizedOdds(), gap)

In [20]:
pred_fair = np.array(fair_model(X_TEST))
print("Accuracy:",  accuracy(Y_TEST, pred_fair))
print("Fairness gap:", max(EO(G_TEST, pred_fair, Y_TEST)))

Accuracy: 0.7798028037736952
Fairness gap: 0.06903169994753355


In [21]:
fair_model_PP = train_fair_model_post_processing(base_model, X_POI, Y_POI.astype(int), G_POI.astype(int), 'equalized_odds')

In [22]:
pred_fair_PP = np.array(fair_model_PP(X_TEST, G_TEST.astype(int)))
print("Accuracy:",  accuracy(Y_TEST, pred_fair_PP))
print("Fairness gap:", max(EO(G_TEST, pred_fair_PP, Y_TEST)))

Accuracy: 0.7266146685354307
Fairness gap: 0.10639372848892825


# Algorithm 2  
For algorithm 2 we use $L = \lambda/\epsilon = 100$

In [23]:
X_POI, Y_POI, G_POI = algorithm2(datatrain, attacker_set, epsilon, L=100, num_iters=1000, flip=False)

Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1


In [24]:
unconstrained_model = train_unconstraind_model(base_model, X_POI, Y_POI)

In [25]:
pred_unconstrained = unconstrained_model(X_TEST)
print("Accuracy:",  accuracy(Y_TEST, pred_unconstrained))
print("Fairness gap:", max(EO(G_TEST, pred_unconstrained, Y_TEST)))

Accuracy: 0.9166666666666666
Fairness gap: 0.125


In [26]:
fair_model = train_fair_model_reduction(base_model, X_POI, Y_POI, G_POI, EqualizedOdds(), gap)

In [27]:
pred_fair = np.array(fair_model(X_TEST))
print("Accuracy:",  accuracy(Y_TEST, pred_fair))
print("Fairness gap:", max(EO(G_TEST, pred_fair, Y_TEST)))

Accuracy: 0.7473789269733544
Fairness gap: 0.3348137543433376


In [28]:
fair_model_PP = train_fair_model_post_processing(base_model, X_POI, Y_POI.astype(int), G_POI.astype(int), 'equalized_odds')

In [29]:
pred_fair_PP = np.array(fair_model_PP(X_TEST, G_TEST.astype(int)))
print("Accuracy:",  accuracy(Y_TEST, pred_fair_PP))
print("Fairness gap:", max(EO(G_TEST, pred_fair_PP, Y_TEST)))

Accuracy: 0.7057768399396952
Fairness gap: 0.5559770754491261
