# Demo of individual fairness testing for SenSR fitted on Adult data

We present a small scale demo for SenSR fitted on Adult data. We shall use SenSR weights fitted on Adult data. 

Let's install and load the requred models. 

In [1]:
!pip install aif360
import numpy as np
import tensorflow as tf
from tensorflow import keras
from sensr.adult_modified import preprocess_adult_data
from sklearn import linear_model
import sensr.utils as utils
import scipy
import json
import warnings
warnings.filterwarnings("ignore")
from functools import partial
from scipy.stats import norm
import sensr.metrics as metrics

Let's load the seed for train-test split and initialization of model fitting.

In [2]:
seeds = np.load('seeds.npy')
run = 0
seed_data, seed_model = seeds[run, 0], seeds[run, 1]

### Data pre-processing 

Let's load the data

In [3]:
dataset_orig_train, dataset_orig_test = preprocess_adult_data(seed = seed_data)
x_unprotected_train, x_protected_train = dataset_orig_train.features[:, :39], dataset_orig_train.features[:, 39:]
x_unprotected_test, x_protected_test = dataset_orig_test.features[:, :39], dataset_orig_test.features[:, 39:]
y_train, y_test = dataset_orig_train.labels.reshape((-1,)), dataset_orig_test.labels.reshape((-1,))

We fit logistic regression for gender and race on the other covariates to get the sensetive directions. We then extract orthonormal basis from them. These will be used to project out sensitive directions from features.

In [4]:
sensetive_directions = []
protected_regression = linear_model.LogisticRegression(fit_intercept = True)
protected_regression.fit(x_unprotected_test, x_protected_test[:, 0])
sensetive_directions.append(protected_regression.coef_.reshape((-1,)))
protected_regression.fit(x_unprotected_test, x_protected_test[:, 1])
sensetive_directions.append(protected_regression.coef_.reshape((-1,)))
sensetive_directions = np.array(sensetive_directions)

sensetive_directions = scipy.linalg.orth(sensetive_directions.T).T
for i, s in enumerate(sensetive_directions):
    while np.linalg.norm(s) != 1:
        s = s/ np.linalg.norm(s)
    sensetive_directions[i] = s

The variables are casted to proper tensor objects

In [5]:
y_train, y_test = y_train.astype('int32'), y_test.astype('int32')
x_unprotected_train, x_unprotected_test = tf.cast(x_unprotected_train, dtype = tf.float32), tf.cast(x_unprotected_test, dtype = tf.float32)
y_train, y_test = tf.one_hot(y_train, 2), tf.one_hot(y_test, 2)
sensetive_directions = tf.cast(sensetive_directions, dtype = tf.float32)

### Reload model

Let's load the weights and biases of the corresponding run of experiment. 

In [6]:
with open(f'./sensr/models/data_{seed_data}_{seed_model}.txt', 'r') as f:
    weight = json.load(f)
weights = [np.array(w) for w in weight]

Now we build the graph using pre-trained weights

In [7]:
# Function for boulding layer with given weight and bias
def SimpleDense(variable):
    w, b = variable
    w = tf.cast(w, dtype = tf.float32)
    b = tf.cast(b, dtype = tf.float32)
    return lambda x: tf.matmul(x, w) + b

# We use prefitted weights and biases to build the graph
def graph(x):
    layer1 = SimpleDense([weights[0], weights[1]])
    layer2 = SimpleDense([weights[2], weights[3]])
    out = tf.nn.relu(layer1(x))
    out = layer2(out)
    prob = tf.nn.softmax(out)
    return prob

### Gradient flow attack and hypothesis testing

We define the required function which performs gradient-flow-attack and returns ratio of perturbed loss and original loss

In [8]:
def sample_perturbation(data_point, regularizer = 100, learning_rate = 5e-2, num_steps = 200):
    """
    Calculates ratio between perturbed loss and original loss

    parameters: 
        data_point: tuple of x, y
            x: tensor of shape (d, )
            y: one-hot encoded tensor of shape (2, )
        regularizer (float): regularizer constant for fair metric
        learning_rate (float): step size for gradient ascend
        num_steps (int): number of steps in gradient ascend

    return:
        float; ratio of entropy losses for perturbed and original sample
    """
    x, y = data_point
    x = tf.reshape(x, (1, -1))
    y = tf.reshape(y, (1, -1))
    x_start = x
    for i in range(num_steps):
        with tf.GradientTape() as g:
            g.watch(x)
            prob = graph(x)
            perturb = utils.unprotected_direction(x-x_start, sensetive_directions)
            loss = utils.EntropyLoss(y, prob)  - regularizer  * tf.norm(perturb)**2

        gradient = g.gradient(loss, x)
        x = x + learning_rate * gradient / ((i + 1) ** (2/3))

    return_loss = utils.EntropyLoss(y, graph(x)) / utils.EntropyLoss(y, graph(x_start))
    
    return return_loss.numpy()

For demo purpose we perform gradient flow attack only on first 20 test points. Readers are welcome to perform it on their liking of test points. We create zipped sequence of data points for first 20 test points. 

In [9]:
start = 0
end = 20
data_points = zip(x_unprotected_test[start:end], y_test[start:end])

We now setup some experimental parameters and extract a partial function using them. 

In [10]:
regularizer = 50
learning_rate = 1e-2
num_steps = 200
sample_perturb = partial(sample_perturbation, regularizer = regularizer, learning_rate = \
                        learning_rate, num_steps = num_steps)
test_ratios = map(sample_perturb, data_points)
test_ratios = list(test_ratios)
test_ratios = np.array(test_ratios)

Now we calculate the lower bound and p-value for the test.

In [11]:
test_ratios = test_ratios[np.isfinite(test_ratios)]
lower_bound = np.mean(test_ratios) - 1.645*np.std(test_ratios)/np.sqrt(test_ratios.shape[0])
t = (np.mean(test_ratios)-1.25)/np.std(test_ratios)
t *= np.sqrt(test_ratios.shape[0])
pval = 1- norm.cdf(t)
print(f'For the proposed test, lower bound is {lower_bound} and\
 p-value is {pval}.\n')
decision = 'rejected' if pval < 0.05 else 'not rejected'
if pval < 0.05:
    print('The test is rejected at a level 0.05 and\
 we conclude the model is not \033[1;40;47mindividually fair.')
else:
    print('The test is not rejected at a\
 level 0.05 and we conclude the model is \033[1;40;47mindividually fair.')
    

For the proposed test, lower bound is 1.0109767625867034 and p-value is 1.0.

The test is not rejected at a level 0.05 and we conclude the model is [1;40;47mindividually fair.


### Some fairness measures

Now we calculate some fairness measures for the fitted model. First we get the predictions on the test data points. 

In [12]:
prob = graph(x_unprotected_test)
y_pred = tf.argmax(prob, axis = 1)
y_pred = y_pred.numpy()
gender = dataset_orig_test.features[:, 39]
race = dataset_orig_test.features[:, 40]
y_test = dataset_orig_test.labels.reshape((-1,))

Now we calculate sevaral fairness measures for gender and race. 

In [13]:
print('\033[1;40;47mMeasures for gender:\033[1;40;0m')
_ = metrics.group_metrics(y_test, y_pred, gender, label_good=1)

print('\n\033[1;40;47mMeasures for race:\033[1;40;0m')
_ = metrics.group_metrics(y_test, y_pred, race, label_good=1)

[1;40;47mMeasures for gender:[1;40;0m
Accuracy is 0.751797
Balanced accuracy is 0.750635
Gap RMS is 0.04686660860680571
Mean absolute gap is 0.04467711097540024
Max gap is 0.05883460924293629
Average odds difference is -0.014157
Equal opportunity difference is 0.030520
Statistical parity difference is -0.142075

[1;40;47mMeasures for race:[1;40;0m
Accuracy is 0.751797
Balanced accuracy is 0.750635
Gap RMS is 0.05802546350787034
Mean absolute gap is 0.05786025174847749
Max gap is 0.06223582975669539
Average odds difference is -0.057860
Equal opportunity difference is -0.062236
Statistical parity difference is -0.099100
