# Demo of individual fairness testing for SenSR and reduction using **COMPAS** data

We present a small scale demo for SenSR fitted on COMPAS data. We shall use SenSR weights fitted on COMPAS data. 

Let's install and load the requred models. 

In [1]:
#!pip install aif360
import numpy as np
import tensorflow as tf
from tensorflow import keras
from compas_data import get_compas_train_test
from sklearn import linear_model
import sensr.utils as utils
import scipy
import json
import warnings
warnings.filterwarnings("ignore")
from functools import partial
from scipy.stats import norm
import sensr.metrics as metrics

Let's load the seed for train-test split and initialization of model fitting.

In [2]:
seeds = np.load('seeds.npy')
run = 0
seed_data, seed_model = seeds[run, 0], seeds[run, 1]

### Data pre-processing 

Let's load the data

In [3]:
x_train, x_test, y_train, y_test, y_sex_train, y_sex_test,\
        y_race_train, y_race_test, _ = get_compas_train_test(random_state = seed_data)
group_train, group_test = np.copy(x_train[:, :2]), np.copy(x_test[:, :2])
y_sex_train, y_sex_test, y_race_train, y_race_test = np.copy(y_sex_train), np.copy(y_sex_test),\
        np.copy(y_race_train), np.copy(y_race_test)

(5278, 8)
['sex', 'race', 'priors_count', 'age_cat=25 to 45', 'age_cat=Greater than 45', 'age_cat=Less than 25', 'c_charge_degree=F', 'c_charge_degree=M']
sex
race


We fit logistic regression for gender and race on the other covariates to get the sensetive directions. We then extract orthonormal basis from them. These will be used to project out sensitive directions from features.

In [4]:
def sensitive_dir(x, gender, race):
    d = x.shape[1]
    sensetive_directions = []
    protected_regression = linear_model.LogisticRegression(fit_intercept = True)
    protected_regression.fit(x[:, 2:], gender)
    a = protected_regression.coef_.reshape((-1,))
    a = np.concatenate(([0, 0], a), axis=0)
    sensetive_directions.append(a)
    protected_regression.fit(x[:,2:], race)
    a = protected_regression.coef_.reshape((-1,))
    a = np.concatenate(([0, 0], a), axis=0)
    sensetive_directions.append(a)
    a, b = np.zeros((d,)), np.zeros((d,))
    a[0], b[1] = 1, 1
    sensetive_directions.append(a)
    sensetive_directions.append(b)
    sensetive_directions = np.array(sensetive_directions)

    # Extrancting orthornormal basis for sensitive directions
    sensetive_basis = scipy.linalg.orth(sensetive_directions.T).T
    for i, s in enumerate(sensetive_basis):
        #while np.linalg.norm(s) != 1:
        s = s/ np.linalg.norm(s)
        sensetive_basis[i] = s

    return sensetive_directions, sensetive_basis


_, sensetive_basis = sensitive_dir(x_test, y_sex_test, y_race_test)


The variables are casted to proper tensor objects

In [5]:
y_test = y_test.astype('int32')
x_test = tf.cast(x_test, dtype = tf.float32)
y_test = tf.one_hot(y_test, 2)
sensetive_basis = tf.cast(sensetive_basis, dtype = tf.float32)

## SenSR

Here we present the demo for SenSR model. We provide both the options of fitting the model or using pre-trained model (training SenSR takes a while). 

### Training SenSR

SenSR codes are written in tensorflow1. So, we load the tensorflow1 compatable modules in tensorflow2.

In [None]:
# Skip this if you want to use pretrained model
import tensorflow.compat.v1 as tf
from sensr.train_clp_adult import train_fair_nn
from sklearn.preprocessing import OneHotEncoder
def run_sensr(seed_data, seed_model):
    

    x_train, x_test, y_train, y_test, y_sex_train, y_sex_test,\
        y_race_train, y_race_test, _ = compas.get_compas_train_test(random_state = seed_data)
    group_train, group_test = np.copy(x_train[:, :2]), np.copy(x_test[:, :2])
    y_sex_train, y_sex_test, y_race_train, y_race_test = np.copy(y_sex_train), np.copy(y_sex_test),\
        np.copy(y_race_train), np.copy(y_race_test)
    
    
    group_names = ['sex', 'race']


    one_hot = OneHotEncoder(sparse=False)
    one_hot.fit(y_train.reshape(-1,1))
    names_income = one_hot.categories_
    y_train = one_hot.transform(y_train.reshape(-1,1))
    y_test = one_hot.transform(y_test.reshape(-1,1))
 
    sensetive_directions, _ = sensitive_dir(x_train, y_sex_train, y_race_train)

    tf.reset_default_graph()
    fair_info = [group_train, group_test, group_names, sensetive_directions]
    weights, train_logits, test_logits, _, variables = train_fair_nn(x_train, y_train,\
         tf_prefix='sensr', adv_epoch_full=8,l2_attack=0.0001, adv_epoch=10, ro=0.001,\
         adv_step=0.1, plot=save_model, fair_info=fair_info, balance_batch=True, \
         X_test = x_test, X_test_counter=None, y_test = y_test, lamb_init=2., n_units=[100],\
         l2_reg=0, epoch=16000, batch_size=1000, lr=10e-5, lambda_clp=0.,\
         fair_start=0., counter_init=False, seed=seed_model)

    

    return weight
weights = run_sensr(seed_data, seed_model)

# Reloading tensorflow 2 modules 
import tensorflow as tf

### Reload pre-trained weights

Let's load the weights and biases of the corresponding run of experiment. 

In [6]:
with open(f'./sensr/models/data_{seed_data}_{seed_model}.txt', 'r') as f:
    weight = json.load(f)
weights = [np.array(w) for w in weight]

### Building the graph 

Now we build the graph using pre-trained weights

In [7]:
# Function for boulding layer with given weight and bias
def SimpleDense(variable):
    w, b = variable
    w = tf.cast(w, dtype = tf.float32)
    b = tf.cast(b, dtype = tf.float32)
    return lambda x: tf.matmul(x, w) + b

# We use prefitted weights and biases to build the graph
def graph_sensr(x):
    layer1 = SimpleDense([weights[0], weights[1]])
    layer2 = SimpleDense([weights[2], weights[3]])
    out = tf.nn.relu(layer1(x))
    out = layer2(out)
    prob = tf.nn.softmax(out)
    return prob

### Gradient flow attack and hypothesis testing

We define the required function which performs gradient-flow-attack and returns ratio of perturbed loss and original loss

In [8]:
def sample_perturbation(data_point, regularizer = 100, learning_rate = 5e-2, num_steps = 200):
    """
    Calculates ratio between perturbed loss and original loss

    parameters: 
        data_point: tuple of x, y
            x: tensor of shape (d, )
            y: one-hot encoded tensor of shape (2, )
        regularizer (float): regularizer constant for fair metric
        learning_rate (float): step size for gradient ascend
        num_steps (int): number of steps in gradient ascend

    return:
        float; ratio of entropy losses for perturbed and original sample
    """
    x, y = data_point
    x = tf.reshape(x, (1, -1))
    y = tf.reshape(y, (1, -1))
    x_start = x
    for i in range(num_steps):
        with tf.GradientTape() as g:
            g.watch(x)
            prob = graph_sensr(x)
            perturb = utils.unprotected_direction(x-x_start, sensetive_basis)
            loss = utils.EntropyLoss(y, prob)  - regularizer  * tf.norm(perturb)**2

        gradient = g.gradient(loss, x)
        x = x + learning_rate * gradient

    return_loss = utils.EntropyLoss(y, graph_sensr(x)) / utils.EntropyLoss(y, graph_sensr(x_start))
    
    return return_loss.numpy()

For demo purpose we perform gradient flow attack only on first 20 test points. Readers are welcome to perform it on their liking of test points. We create zipped sequence of data points for first 20 test points. 

In [9]:
start = 0
end = 20
data_points = zip(x_test[start:end], y_test[start:end])

We now setup some experimental parameters and extract a partial function using them. 

In [10]:
regularizer = 100
learning_rate = 1e-2
num_steps = 200
sample_perturb = partial(sample_perturbation, regularizer = regularizer, learning_rate = \
                        learning_rate, num_steps = num_steps)
test_ratios = map(sample_perturb, data_points)
test_ratios = list(test_ratios)
test_ratios = np.array(test_ratios)

Now we calculate the lower bound and p-value for the test.

In [11]:
test_ratios = test_ratios[np.isfinite(test_ratios)]
lower_bound = np.mean(test_ratios) - 1.645*np.std(test_ratios)/np.sqrt(test_ratios.shape[0])
t = (np.mean(test_ratios)-1.25)/np.std(test_ratios)
t *= np.sqrt(test_ratios.shape[0])
pval = 1- norm.cdf(t)
print(f'For the proposed test, lower bound is {lower_bound} and\
 p-value is {pval}.\n')
decision = 'rejected' if pval < 0.05 else 'not rejected'
if pval < 0.05:
    print('The test is rejected at a level 0.05 and\
 we conclude the model is not \033[1;40;47mindividually fair.')
else:
    print('The test is not rejected at a\
 level 0.05 and we conclude the model is \033[1;40;47mindividually fair.')
    

For the proposed test, lower bound is 1.043219325729754 and p-value is 0.9999359875463081.

The test is not rejected at a level 0.05 and we conclude the model is [1;40;47mindividually fair.


### Some fairness measures

Now we calculate some fairness measures for the fitted model. First we get the predictions on the test data points. 

In [12]:
prob = graph_sensr(x_test)
y_pred = tf.argmax(prob, axis = 1)
y_pred = y_pred.numpy()
gender = y_sex_test
race = y_race_test
labels_test = y_test.numpy()[:, 1]

Now we calculate sevaral fairness measures for gender and race. 

In [13]:
print('\033[1;40;47mMeasures for gender:\033[1;40;0m')
_ = metrics.group_metrics(labels_test, y_pred, gender, label_good=1)

print('\n\033[1;40;47mMeasures for race:\033[1;40;0m')
_ = metrics.group_metrics(labels_test, y_pred, race, label_good=1)

print('\n\n*Accuracy and Balanced accuracy are measuring performance irrespective of the protected attribute.')

[1;40;47mMeasures for gender:[1;40;0m
Accuracy is 0.613636
Balanced accuracy is 0.611854
Gap RMS is 0.012150305376121497
Mean absolute gap is 0.011789878620164812
Max gap is 0.014727340595201699
Average odds difference is -0.011790
Equal opportunity difference is -0.008852
Statistical parity difference is 0.024008

[1;40;47mMeasures for race:[1;40;0m
Accuracy is 0.613636
Balanced accuracy is 0.611854
Gap RMS is 0.2319540377864503
Mean absolute gap is 0.22779090159377147
Max gap is 0.2715399678299181
Average odds difference is 0.227791
Equal opportunity difference is 0.271540
Statistical parity difference is 0.240416


*Accuracy and Balanced accuracy are measuring performance irrespective of the protected attribute.


## Reduction

Here we present testing demo for reduction method. As before we are providing both the options of training the model or using pre-tarined model. 

### Training reduction

In [14]:
from sklearn.linear_model import LogisticRegression
from fairlearn.reductions import ExponentiatedGradient
from fairlearn.reductions import DemographicParity, TruePositiveRateDifference, ErrorRateRatio, EqualizedOdds
from reduction.metrics import group_metrics
constraints = {'TPRD': TruePositiveRateDifference,
               'ERR': ErrorRateRatio,
               'DP': DemographicParity,
               'EO': EqualizedOdds}

In [15]:
# Skip this if you want to use pre-trained model
def extract_weights(run):
    data_seed = seeds[run, 0]
    
    x_train,x_test, y_train, y_test, y_sex_train, y_sex_test, y_race_train, y_race_test,\
         feature_names = get_compas_train_test(random_state = data_seed)
    #x_train, x_test = x_train[:, 2:], x_test[:, 2:]

    group_train_cross = y_sex_train + 2*y_race_train
    group_test_cross = y_sex_test + 2*y_race_test



    #### Using 4 protected attributes ####
    ## Reduction classifier
    eps = 0.05
    constraint = EqualizedOdds()
    classifier = LogisticRegression(solver='liblinear', fit_intercept=True)
    mitigator = ExponentiatedGradient(classifier, constraint, eps=eps, T=50)
    mitigator.fit(x_train, y_train, sensitive_features=group_train_cross)
    y_pred_mitigated = mitigator.predict(x_test)
    print('\nFair on all test')
    _ = group_metrics(y_test, y_pred_mitigated, y_race_test, label_protected=0, label_good=0)

    ens_weights = []
    coefs = []
    intercepts = []

    for t, w_t in enumerate(mitigator._weights.index):
        if mitigator._weights[w_t] > 0:
            coefs.append(mitigator._predictors[t].coef_.flatten())
            intercepts.append(mitigator._predictors[t].intercept_[0])
            ens_weights.append(mitigator._weights[w_t])

    ens_weight = [e.tolist() for e in ens_weights]
    coef = [c.tolist() for c in coefs]
    intercept = [i.tolist() for i in intercepts]

    data = {'ens_weights': ens_weight, 'coefs': coef, 'intercepts': intercept}
    return data

data = extract_weights(run)

(5278, 8)
['sex', 'race', 'priors_count', 'age_cat=25 to 45', 'age_cat=Greater than 45', 'age_cat=Less than 25', 'c_charge_degree=F', 'c_charge_degree=M']
sex
race

Fair on all test
Accuracy is 0.650568
Balanced accuracy is 0.646626
Gap RMS is 0.061104992926792515
Mean absolute gap is 0.04767215443753367
Max gap is 0.08589761521784739
Average odds difference is -0.038225
Equal opportunity difference is 0.009447
Statistical parity difference is -0.057265


### Loading pretrained weights

In [16]:
with open(f'./reduction/models/data_{seed_data}.txt', 'r') as f:
    data = json.load(f)

### Building the graph

We now build the graph with extracted weights, intercepts and coefficients 

In [17]:
coef = data['coefs']
intercept = data['intercepts']
weight = data['ens_weights']
coefs = [tf.cast(c, dtype = tf.float32) for c in coef]
intercepts = [tf.cast(c, dtype = tf.float32) for c in intercept]
weights = [tf.cast(c, dtype = tf.float32) for c in weight]

def graph_reduction(x):
    global coefs, intercepts, weights
    n, _ = x.shape
    prob = tf.zeros([n, 1], dtype = tf.float32)
    for coef, intercept, weight in zip(coefs, intercepts, weights):
        coef = tf.reshape(coef, [-1, 1])
        model_logit = x @ coef + intercept
        model_prob = tf.exp(model_logit) / (1 + tf.exp(model_logit))
        prob += model_prob * weight

    return tf.concat([1-prob, prob], axis = 1)


### Gradient flow attack and hypothesis testing

We now perform gradient flow attack on reduction model. As before, we first define the function for gradient flow attack. For demonstration purpose we perform attact on first 20 sample points.

In [18]:
def sample_perturbation(data_point, regularizer = 100, learning_rate = 5e-2, num_steps = 200):
    """
    Calculates ratio between perturbed loss and original loss

    parameters: 
        data_point: tuple of x, y
            x: tensor of shape (d, )
            y: one-hot encoded tensor of shape (2, )
        regularizer (float): regularizer constant for fair metric
        learning_rate (float): step size for gradient ascend
        num_steps (int): number of steps in gradient ascend

    return:
        float; ratio of entropy losses for perturbed and original sample
    """
    x, y = data_point
    x = tf.reshape(x, (1, -1))
    y = tf.reshape(y, (1, -1))
    x_start = x
    for i in range(num_steps):
        with tf.GradientTape() as g:
            g.watch(x)
            prob = graph_reduction(x)
            perturb = utils.unprotected_direction(x-x_start, sensetive_basis)
            loss = utils.EntropyLoss(y, prob)  - regularizer  * tf.norm(perturb)**2

        gradient = g.gradient(loss, x)
        x = x + learning_rate * gradient

    return_loss = utils.EntropyLoss(y, graph_reduction(x)) / utils.EntropyLoss(y, graph_reduction(x_start))
    
    return return_loss.numpy()

In [19]:
start = 0
end = 20
data_points = zip(x_test[start:end], y_test[start:end])

We setup parameters for gradient flow attack

In [20]:
regularizer = 100
learning_rate = 1e-2
num_steps = 200
sample_perturb = partial(sample_perturbation, regularizer = regularizer, learning_rate = \
                        learning_rate, num_steps = num_steps)

The gradient flow attack is performed on the selected sample points. Along with the lower bound and decision is given.

In [21]:
test_ratio = map(sample_perturb, data_points)
test_ratio = list(test_ratio)
test_ratio = np.array(test_ratio)
test_ratio = test_ratio[np.isfinite(test_ratio)]
lower_bound = np.mean(test_ratio) - 1.645*np.std(test_ratio)/np.sqrt(test_ratio.shape[0])
t = (np.mean(test_ratio)-1.25)/np.std(test_ratio)
t *= np.sqrt(test_ratio.shape[0])
pval = 1- norm.cdf(t)
print(f'For the proposed test, lower bound is {lower_bound} and\
 p-value is {pval}.\n')
decision = 'rejected' if pval < 0.05 else 'not rejected'
if pval < 0.05:
    print('The test is rejected at a level 0.05 and\
 we conclude the model is \033[1;40;47mnot individually fair.')
else:
    print('The test is not rejected at a\
 level 0.05 and we conclude the model \033[1;40;47mis individually fair.')
    

For the proposed test, lower bound is 2.6969997419298033 and p-value is 0.0.

The test is rejected at a level 0.05 and we conclude the model is [1;40;47mnot individually fair.


### Some fairness measures

Now we calculate some fairness measures for the fitted model. First we get the predictions on the test data points. 

In [22]:
prob = graph_reduction(x_test)
y_pred = tf.argmax(prob, axis = 1)
y_pred = y_pred.numpy()
gender = y_sex_test
race = y_race_test
labels_test = y_test.numpy()[:, 1]

print('\033[1;40;47mMeasures for gender:\033[1;40;0m')
_ = metrics.group_metrics(labels_test, y_pred, gender, label_good=1)

print('\n\033[1;40;47mMeasures for race:\033[1;40;0m')
_ = metrics.group_metrics(labels_test, y_pred, race, label_good=1)
print('\n\n*Accuracy and Balanced accuracy are measuring performance irrespective of the protected attribute.')

[1;40;47mMeasures for gender:[1;40;0m
Accuracy is 0.657197
Balanced accuracy is 0.652998
Gap RMS is 0.0705225979867558
Mean absolute gap is 0.07047441423368517
Max gap is 0.07308089945722407
Average odds difference is -0.070474
Equal opportunity difference is -0.073081
Statistical parity difference is -0.018902

[1;40;47mMeasures for race:[1;40;0m
Accuracy is 0.657197
Balanced accuracy is 0.652998
Gap RMS is 0.06616066577313631
Mean absolute gap is 0.058869817533930974
Max gap is 0.08906217217987278
Average odds difference is 0.030192
Equal opportunity difference is 0.089062
Statistical parity difference is 0.049304


*Accuracy and Balanced accuracy are measuring performance irrespective of the protected attribute.
