# Demo of individual fairness testing for SenSR and reduction

We present a small scale demo for SenSR fitted on Adult data. We shall use SenSR weights fitted on Adult data. 

Let's install and load the requred models. 

In [1]:
!pip install aif360
import numpy as np
import tensorflow as tf
from tensorflow import keras
from sensr.adult_modified import preprocess_adult_data
from sklearn import linear_model
import sensr.utils as utils
import scipy
import json
import warnings
warnings.filterwarnings("ignore")
from functools import partial
from scipy.stats import norm
import sensr.metrics as metrics

Let's load the seed for train-test split and initialization of model fitting.

In [2]:
seeds = np.load('seeds.npy')
run = 0
seed_data, seed_model = seeds[run, 0], seeds[run, 1]

### Data pre-processing 

Let's load the data

In [3]:
dataset_orig_train, dataset_orig_test = preprocess_adult_data(seed = seed_data)
x_unprotected_train, x_protected_train = dataset_orig_train.features[:, :39], dataset_orig_train.features[:, 39:]
x_unprotected_test, x_protected_test = dataset_orig_test.features[:, :39], dataset_orig_test.features[:, 39:]
y_train, y_test = dataset_orig_train.labels.reshape((-1,)), dataset_orig_test.labels.reshape((-1,))

We fit logistic regression for gender and race on the other covariates to get the sensetive directions. We then extract orthonormal basis from them. These will be used to project out sensitive directions from features.

In [4]:
sensetive_directions = []
protected_regression = linear_model.LogisticRegression(fit_intercept = True)
protected_regression.fit(x_unprotected_test, x_protected_test[:, 0])
sensetive_directions.append(protected_regression.coef_.reshape((-1,)))
protected_regression.fit(x_unprotected_test, x_protected_test[:, 1])
sensetive_directions.append(protected_regression.coef_.reshape((-1,)))
sensetive_directions = np.array(sensetive_directions)

sensetive_directions = scipy.linalg.orth(sensetive_directions.T).T
for i, s in enumerate(sensetive_directions):
    while np.linalg.norm(s) != 1:
        s = s/ np.linalg.norm(s)
    sensetive_directions[i] = s

The variables are casted to proper tensor objects

In [5]:
y_train, y_test = y_train.astype('int32'), y_test.astype('int32')
x_unprotected_train, x_unprotected_test = tf.cast(x_unprotected_train, dtype = tf.float32), tf.cast(x_unprotected_test, dtype = tf.float32)
y_train, y_test = tf.one_hot(y_train, 2), tf.one_hot(y_test, 2)
sensetive_directions = tf.cast(sensetive_directions, dtype = tf.float32)

## SenSR

Here we present the demo for SenSR model. We provide both the options of fitting the model or using pre-trained model (training SenSR takes a while). 

### Training SenSR

SenSR codes are written in tensorflow1. So, we load the tensorflow1 compatable modules in tensorflow2.

In [None]:
# Skip this if you want to use pretrained model
import tensorflow.compat.v1 as tf
from sensr.train_clp_adult import train_fair_nn
from sklearn.preprocessing import OneHotEncoder
def run_sensr(seed_data, seed_model):
    

    #seed_data = int(float(sys.argv[1]))
    #seed_model = int(float(sys.argv[2]))

    dataset_orig_train, dataset_orig_test = preprocess_adult_data(seed = seed_data)

    all_train, all_test = dataset_orig_train.features, dataset_orig_test.features
    y_train, y_test = dataset_orig_train.labels.reshape((-1,)), dataset_orig_test.labels.reshape((-1,))
    y_train, y_test = y_train.astype('int32'), y_test.astype('int32')

    x_train = np.delete(all_train, [dataset_orig_test.feature_names.index(feat) for feat in ['sex_ Male', 'race_ White']], axis = 1)
    x_test = np.delete(all_test, [dataset_orig_test.feature_names.index(feat) for feat in ['sex_ Male', 'race_ White']], axis = 1)

    group_train = dataset_orig_train.features[:, [dataset_orig_test.feature_names.index(feat) for feat in ['sex_ Male', 'race_ White']]]
    group_test = dataset_orig_test.features[:, [dataset_orig_test.feature_names.index(feat) for feat in ['sex_ Male', 'race_ White']]]
    group_names = ['Gender', 'Race']

    one_hot = OneHotEncoder(sparse=False)
    one_hot.fit(y_train.reshape(-1,1))
    names_income = one_hot.categories_
    y_train = one_hot.transform(y_train.reshape(-1,1))
    y_test = one_hot.transform(y_test.reshape(-1,1))

    sensitive_directions = []
    for y_protected in group_train.T:
    	lr = linear_model.LogisticRegression(solver='liblinear', fit_intercept=True)
    	lr.fit(x_train, y_protected)
    	sensitive_directions.append(lr.coef_.flatten())

    sensitive_directions = np.array(sensitive_directions)

    tf.reset_default_graph()
    fair_info = [group_train, group_test, group_names, sensitive_directions]
    weights, train_logits, test_logits, _, variables = train_fair_nn(x_train, y_train, tf_prefix='sensr', adv_epoch_full=50, l2_attack=0.0001,
                                          adv_epoch=10, ro=0.001, adv_step=10., plot=False, fair_info=fair_info, balance_batch=True, 
                                          X_test = x_test, X_test_counter=None, y_test = y_test, lamb_init=2., 
                                          n_units=[100], l2_reg=0, epoch=20000, batch_size=1000, lr=1e-5, lambda_clp=0.,
                                          fair_start=0., counter_init=False, seed=None)

    

    return weight
weights = run_sensr(seed_data, seed_model)

# Reloading tensorflow 2 modules 
import tensorflow as tf

### Reload pre-trained weights

Let's load the weights and biases of the corresponding run of experiment. 

In [6]:
with open(f'./sensr/models/data_{seed_data}_{seed_model}.txt', 'r') as f:
    weight = json.load(f)
weights = [np.array(w) for w in weight]

### Building the graph 

Now we build the graph using pre-trained weights

In [7]:
# Function for boulding layer with given weight and bias
def SimpleDense(variable):
    w, b = variable
    w = tf.cast(w, dtype = tf.float32)
    b = tf.cast(b, dtype = tf.float32)
    return lambda x: tf.matmul(x, w) + b

# We use prefitted weights and biases to build the graph
def graph_sensr(x):
    layer1 = SimpleDense([weights[0], weights[1]])
    layer2 = SimpleDense([weights[2], weights[3]])
    out = tf.nn.relu(layer1(x))
    out = layer2(out)
    prob = tf.nn.softmax(out)
    return prob

### Gradient flow attack and hypothesis testing

We define the required function which performs gradient-flow-attack and returns ratio of perturbed loss and original loss

In [8]:
def sample_perturbation(data_point, regularizer = 100, learning_rate = 5e-2, num_steps = 200):
    """
    Calculates ratio between perturbed loss and original loss

    parameters: 
        data_point: tuple of x, y
            x: tensor of shape (d, )
            y: one-hot encoded tensor of shape (2, )
        regularizer (float): regularizer constant for fair metric
        learning_rate (float): step size for gradient ascend
        num_steps (int): number of steps in gradient ascend

    return:
        float; ratio of entropy losses for perturbed and original sample
    """
    x, y = data_point
    x = tf.reshape(x, (1, -1))
    y = tf.reshape(y, (1, -1))
    x_start = x
    for i in range(num_steps):
        with tf.GradientTape() as g:
            g.watch(x)
            prob = graph_sensr(x)
            perturb = utils.unprotected_direction(x-x_start, sensetive_directions)
            loss = utils.EntropyLoss(y, prob)  - regularizer  * tf.norm(perturb)**2

        gradient = g.gradient(loss, x)
        x = x + learning_rate * gradient

    return_loss = utils.EntropyLoss(y, graph_sensr(x)) / utils.EntropyLoss(y, graph_sensr(x_start))
    
    return return_loss.numpy()

For demo purpose we perform gradient flow attack only on first 20 test points. Readers are welcome to perform it on their liking of test points. We create zipped sequence of data points for first 20 test points. 

In [9]:
start = 0
end = 20
data_points = zip(x_unprotected_test[start:end], y_test[start:end])

We now setup some experimental parameters and extract a partial function using them. 

In [10]:
regularizer = 50
learning_rate = 1e-2
num_steps = 200
sample_perturb = partial(sample_perturbation, regularizer = regularizer, learning_rate = \
                        learning_rate, num_steps = num_steps)
test_ratios = map(sample_perturb, data_points)
test_ratios = list(test_ratios)
test_ratios = np.array(test_ratios)

Now we calculate the lower bound and p-value for the test.

In [11]:
test_ratios = test_ratios[np.isfinite(test_ratios)]
lower_bound = np.mean(test_ratios) - 1.645*np.std(test_ratios)/np.sqrt(test_ratios.shape[0])
t = (np.mean(test_ratios)-1.25)/np.std(test_ratios)
t *= np.sqrt(test_ratios.shape[0])
pval = 1- norm.cdf(t)
print(f'For the proposed test, lower bound is {lower_bound} and\
 p-value is {pval}.\n')
decision = 'rejected' if pval < 0.05 else 'not rejected'
if pval < 0.05:
    print('The test is rejected at a level 0.05 and\
 we conclude the model is not \033[1;40;47mindividually fair.')
else:
    print('The test is not rejected at a\
 level 0.05 and we conclude the model is \033[1;40;47mindividually fair.')
    

For the proposed test, lower bound is 1.0385039692301912 and p-value is 1.0.

The test is not rejected at a level 0.05 and we conclude the model is [1;40;47mindividually fair.


### Some fairness measures

Now we calculate some fairness measures for the fitted model. First we get the predictions on the test data points. 

In [12]:
prob = graph_sensr(x_unprotected_test)
y_pred = tf.argmax(prob, axis = 1)
y_pred = y_pred.numpy()
gender = dataset_orig_test.features[:, 39]
race = dataset_orig_test.features[:, 40]
labels_test = dataset_orig_test.labels.reshape((-1,))

Now we calculate sevaral fairness measures for gender and race. 

In [13]:
print('\033[1;40;47mMeasures for gender:\033[1;40;0m')
_ = metrics.group_metrics(labels_test, y_pred, gender, label_good=1)

print('\n\033[1;40;47mMeasures for race:\033[1;40;0m')
_ = metrics.group_metrics(labels_test, y_pred, race, label_good=1)

[1;40;47mMeasures for gender:[1;40;0m
Accuracy is 0.751797
Balanced accuracy is 0.750635
Gap RMS is 0.04686660860680571
Mean absolute gap is 0.04467711097540024
Max gap is 0.05883460924293629
Average odds difference is -0.014157
Equal opportunity difference is 0.030520
Statistical parity difference is -0.142075

[1;40;47mMeasures for race:[1;40;0m
Accuracy is 0.751797
Balanced accuracy is 0.750635
Gap RMS is 0.05802546350787034
Mean absolute gap is 0.05786025174847749
Max gap is 0.06223582975669539
Average odds difference is -0.057860
Equal opportunity difference is -0.062236
Statistical parity difference is -0.099100


## Reduction

Here we present testing demo for reduction method. As before we are providing both the options of training the model or using pre-tarined model. 

### Training reduction

In [14]:
from sklearn.linear_model import LogisticRegression
from fairlearn.reductions import ExponentiatedGradient
from fairlearn.reductions import DemographicParity, TruePositiveRateDifference, ErrorRateRatio, EqualizedOdds
from reduction.metrics import group_metrics
constraints = {'TPRD': TruePositiveRateDifference,
               'ERR': ErrorRateRatio,
               'DP': DemographicParity,
               'EO': EqualizedOdds}

In [None]:
# Skip this if you want to use pre-trained model
def extract_weights(run):
    data_seed = seeds[run, 0]
    
    dataset_orig_train, dataset_orig_test = preprocess_adult_data(seed = data_seed)

    all_train, all_test = dataset_orig_train.features, dataset_orig_test.features
    y_train, y_test = dataset_orig_train.labels.reshape((-1,)), dataset_orig_test.labels.reshape((-1,))
    y_train, y_test = y_train.astype('int32'), y_test.astype('int32')

    x_train = np.delete(all_train, [dataset_orig_test.feature_names.index(feat) for feat in ['sex_ Male', 'race_ White']], axis = 1)
    x_test = np.delete(all_test, [dataset_orig_test.feature_names.index(feat) for feat in ['sex_ Male', 'race_ White']], axis = 1)

    group_train = dataset_orig_train.features[:, [dataset_orig_test.feature_names.index(feat) for feat in ['sex_ Male', 'race_ White']]]
    group_test = dataset_orig_test.features[:, [dataset_orig_test.feature_names.index(feat) for feat in ['sex_ Male', 'race_ White']]]
    group_train_cross = group_train[:,0] + group_train[:,1]*2
    group_test_cross = group_test[:,0] + group_test[:,1]*2

    ## Train reductions
    eps = 0.03
    c = 'EO'
    constraint = constraints[c]()
    classifier = LogisticRegression(solver='liblinear', fit_intercept=True, class_weight='balanced')
    mitigator = ExponentiatedGradient(classifier, constraint, eps=eps)
    mitigator.fit(x_train, y_train, sensitive_features=group_train_cross)
    y_pred_mitigated = mitigator.predict(x_test)
    print('\nFair on all test')
    _ = group_metrics(y_test, y_pred_mitigated, group_test[:,0], label_protected=0, label_good=1)


    ens_weights = []
    coefs = []
    intercepts = []

    for t, w_t in enumerate(mitigator._weights.index):
        if mitigator._weights[w_t] > 0:
            coefs.append(mitigator._predictors[t].coef_.flatten())
            intercepts.append(mitigator._predictors[t].intercept_[0])
            ens_weights.append(mitigator._weights[w_t])

    ens_weight = [e.tolist() for e in ens_weights]
    coef = [c.tolist() for c in coefs]
    intercept = [i.tolist() for i in intercepts]

    data = {'ens_weights': ens_weight, 'coefs': coef, 'intercepts': intercept}
    return data

data = extract_weights(run)

### Loading pretrained weights

In [15]:
with open(f'./reduction/models/data_{seed_data}.txt', 'r') as f:
    data = json.load(f)

### Building the graph

We now build the graph with extracted weights, intercepts and coefficients 

In [16]:
coef = data['coefs']
intercept = data['intercepts']
weight = data['ens_weights']
coefs = [tf.cast(c, dtype = tf.float32) for c in coef]
intercepts = [tf.cast(c, dtype = tf.float32) for c in intercept]
weights = [tf.cast(c, dtype = tf.float32) for c in weight]

def graph_reduction(x):
    global coefs, intercepts, weights
    n, _ = x.shape
    prob = tf.zeros([n, 1], dtype = tf.float32)
    for coef, intercept, weight in zip(coefs, intercepts, weights):
        coef = tf.reshape(coef, [-1, 1])
        model_logit = x @ coef + intercept
        model_prob = tf.exp(model_logit) / (1 + tf.exp(model_logit))
        prob += model_prob * weight

    return tf.concat([1-prob, prob], axis = 1)

### Gradient flow attack and hypothesis testing

We now perform gradient flow attack on reduction model. As before, we first define the function for gradient flow attack

In [17]:
def sample_perturbation(data_point, regularizer = 100, learning_rate = 5e-2, num_steps = 200):
    """
    Calculates ratio between perturbed loss and original loss

    parameters: 
        data_point: tuple of x, y
            x: tensor of shape (d, )
            y: one-hot encoded tensor of shape (2, )
        regularizer (float): regularizer constant for fair metric
        learning_rate (float): step size for gradient ascend
        num_steps (int): number of steps in gradient ascend

    return:
        float; ratio of entropy losses for perturbed and original sample
    """
    x, y = data_point
    x = tf.reshape(x, (1, -1))
    y = tf.reshape(y, (1, -1))
    x_start = x
    for i in range(num_steps):
        with tf.GradientTape() as g:
            g.watch(x)
            prob = graph_reduction(x)
            perturb = utils.unprotected_direction(x-x_start, sensetive_directions)
            loss = utils.EntropyLoss(y, prob)  - regularizer  * tf.norm(perturb)**2

        gradient = g.gradient(loss, x)
        x = x + learning_rate * gradient

    return_loss = utils.EntropyLoss(y, graph_reduction(x)) / utils.EntropyLoss(y, graph_reduction(x_start))
    
    return return_loss.numpy()

For demonstration purpose we perform attact on first 20 sample points.

In [18]:
start = 0
end = 20
data_points = zip(x_unprotected_test[start:end], y_test[start:end])

We steup parameters for gradient flow attack

In [19]:
regularizer = 50
learning_rate = 1e-2
num_steps = 200
sample_perturb = partial(sample_perturbation, regularizer = regularizer, learning_rate = \
                        learning_rate, num_steps = num_steps)

The gradient flow attack is performed on the selected sample points. Along with the lower bound and decision is given.

In [20]:
test_ratio = map(sample_perturb, data_points)
test_ratio = list(test_ratio)
test_ratio = np.array(test_ratio)
test_ratio = test_ratio[np.isfinite(test_ratio)]
lower_bound = np.mean(test_ratio) - 1.645*np.std(test_ratio)/np.sqrt(test_ratio.shape[0])
t = (np.mean(test_ratio)-1.25)/np.std(test_ratio)
t *= np.sqrt(test_ratio.shape[0])
pval = 1- norm.cdf(t)
print(f'For the proposed test, lower bound is {lower_bound} and\
 p-value is {pval}.\n')
decision = 'rejected' if pval < 0.05 else 'not rejected'
if pval < 0.05:
    print('The test is rejected at a level 0.05 and\
 we conclude the model is not individually fair.')
else:
    print('The test is not rejected at a\
 level 0.05 and we conclude the model is individually fair.')
    

For the proposed test, lower bound is 2.4446973404799226 and p-value is 6.068317737195628e-09.

The test is rejected at a level 0.05 and we conclude the model is not individually fair.


### Some fairness measures

Now we calculate some fairness measures for the fitted model. First we get the predictions on the test data points. 

In [21]:
prob = graph_reduction(x_unprotected_test)
y_pred = tf.argmax(prob, axis = 1)
y_pred = y_pred.numpy()
gender = dataset_orig_test.features[:, 39]
race = dataset_orig_test.features[:, 40]
labels_test = dataset_orig_test.labels.reshape((-1,))

print('\033[1;40;47mMeasures for gender:\033[1;40;0m')
_ = metrics.group_metrics(labels_test, y_pred, gender, label_good=1)

print('\n\033[1;40;47mMeasures for race:\033[1;40;0m')
_ = metrics.group_metrics(labels_test, y_pred, race, label_good=1)

[1;40;47mMeasures for gender:[1;40;0m
Accuracy is 0.819790
Balanced accuracy is 0.796751
Gap RMS is 0.05525612347933615
Mean absolute gap is 0.05459925122749115
Max gap is 0.06309401122180236
Average odds difference is 0.008495
Equal opportunity difference is 0.063094
Statistical parity difference is -0.145235

[1;40;47mMeasures for race:[1;40;0m
Accuracy is 0.819790
Balanced accuracy is 0.796751
Gap RMS is 0.039352036125304454
Mean absolute gap is 0.03899846673206153
Max gap is 0.04426176795425629
Average odds difference is -0.038998
Equal opportunity difference is -0.044262
Statistical parity difference is -0.087918
