In [1]:
from math import sqrt, isnan
from pathlib import Path

import tensorflow as tf
from tensorflow.keras.optimizers import Adam, Adagrad
from tensorflow.data import Dataset

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import csv

from util import metrics
from util.load_data import load_data
from util.evaluation import compute_tradeoff

from madras_laftr.models import UnfairMLP

In [2]:
batch_size = 64
epochs = 200
lr = 0.001
opt = Adam(learning_rate=lr)

In [3]:
header = "model_name", "clas_acc", "dp", "deqodds", "deqopp", "trade_dp", "trade_deqodds", "trade_deqopp", "TN_a0", "FP_a0", "FN_a0", "TP_a0", "TN_a1", "FP_a1", "FN_a1", "TP_a1"
results = []

test_loop = 5

## Load Data

In [4]:
x_train, y_train, a_train = load_data('adult', 'train')
raw_data = (x_train, y_train, a_train)

In [5]:
xdim = x_train.shape[1]
ydim = y_train.shape[1]
adim = a_train.shape[1]
zdim = 8

In [6]:
train_data = Dataset.from_tensor_slices((x_train, y_train, a_train))
train_data = train_data.batch(batch_size, drop_remainder=True)
train_data

<BatchDataset shapes: ((64, 113), (64, 1), (64, 1)), types: (tf.float64, tf.float64, tf.float64)>

In [7]:
x_valid, y_valid, a_valid = load_data('adult', 'valid')

valid_data = Dataset.from_tensor_slices((x_valid, y_valid, a_valid))
valid_data = valid_data.batch(batch_size, drop_remainder=True)

In [8]:
x_test, y_test, a_test = load_data('adult', 'test')

test_data = Dataset.from_tensor_slices((x_test, y_test, a_test))
test_data = test_data.batch(batch_size, drop_remainder=True)

## Train Lopp

In [9]:
def train(unfair_clas, X, Y, optimizer):
    
    with tf.GradientTape() as tape0:
        
        unfair_clas(X, Y, training=True) #to compute the foward
        current_loss = unfair_clas.loss #current loss
    
    grads = tape0.gradient(current_loss, unfair_clas.variables)
    
    optimizer.apply_gradients(zip(grads, unfair_clas.variables))

In [10]:
def train_loop(unfair_clas, train_dataset, epochs, optmizer):
    
    print("> Epoch | Class Loss | Class Acc")

    x_train, y_train, a_train = raw_data
    l = y_train.shape[0]
    l = (l//batch_size) * batch_size

    for epoch in range(epochs):
        Y_hat = None
        X_hat = None
        batch_count = 1
        
        for X, Y, A in train_dataset:
            
            train(model, X, Y, optmizer)

            if batch_count == 1:
                Y_hat = model.Y_hat
                batch_count += 1
                
            else:
                Y_hat = tf.concat([Y_hat, model.Y_hat], 0)

        clas_loss = tf.reduce_mean(model.loss)
        clas_acc = metrics.accuracy(y_train[:l], tf.math.round(Y_hat))

        print("> {} | {} | {}".format(
            epoch+1,
            clas_loss,
            clas_acc))

## Evaluation

In [11]:

def evaluation(model, valid_data):
    Y_hat = None
    batch_count = 1
    
    for X, Y, A in valid_data:
        
        model(X, Y, A)
        
        if batch_count == 1:
            Y_hat = model.Y_hat
            batch_count += 1
        else:
            Y_hat = tf.concat([Y_hat, model.Y_hat], 0)
    
    return Y_hat

In [12]:
def compute_metrics(Y, Y_hat, A):
    Y_hat = tf.math.round(Y_hat)
    
    clas_acc = metrics.accuracy(Y, Y_hat)

    print("> Class Acc")
    print("> {}".format(clas_acc))

    dp = metrics.DP(Y_hat.numpy(), A)
    deqodds = metrics.DEqOdds(Y, Y_hat.numpy(), A)
    deqopp = metrics.DEqOpp(Y, Y_hat.numpy(), A)

    print("> DP | DI | DEOPP")
    print("> {} | {} | {}".format(dp, deqodds, deqopp))

    tp = metrics.TP(Y, Y_hat.numpy())
    tn = metrics.TN(Y, Y_hat.numpy())
    fp = metrics.FP(Y, Y_hat.numpy())
    fn = metrics.FN(Y, Y_hat.numpy())

    print('> Confusion Matrix \n' +
                'TN: {} | FP: {} \n'.format(tn, fp) +
                'FN: {} | TP: {}'.format(fn, tp))

    confusion_matrix = np.array([[tn, fp],
                                [fn, tp]])

    m = [metrics.TN, metrics.FP, metrics.FN, metrics.TP]
    metrics_a0 = [0, 0, 0, 0]
    metrics_a1 = [0, 0, 0, 0]
    for i in range(len(m)):
        metrics_a0[i] = metrics.subgroup(m[i], A, Y, Y_hat.numpy())
        metrics_a1[i] = metrics.subgroup(m[i], 1 - A, Y, Y_hat.numpy())

    print('> Confusion Matrix for A = 0 \n' +
            'TN: {} | FP: {} \n'.format(metrics_a0[0], metrics_a0[1]) +
            'FN: {} | TP: {}'.format(metrics_a0[2], metrics_a0[3]))

    print('> Confusion Matrix for A = 1 \n' +
            'TN: {} | FP: {} \n'.format(metrics_a1[0], metrics_a1[1]) +
            'FN: {} | TP: {}'.format(metrics_a1[2], metrics_a1[3]))

    confusion_matrix = np.array([[tn, fp],
                                [fn, tp]])

    return clas_acc, confusion_matrix, dp, deqodds, deqopp, metrics_a0, metrics_a1

## Testing

In [13]:
for i in range(test_loop):
    opt = Adam(learning_rate=lr)

    model = UnfairMLP(xdim, zdim, ydim)
    ret = train_loop(model, train_data, epochs, opt)
    Y_hat = evaluation(model, valid_data)
    
    clas_acc, confusion_matrix, dp, deqodds, deqopp, metrics_a0, metrics_a1  = compute_metrics(y_valid, Y_hat, a_valid)
    
    fair_metrics = (dp, deqodds, deqopp)
    
    tradeoff = []
    
    for fair_metric in fair_metrics:
        tradeoff.append(compute_tradeoff(clas_acc, fair_metric))
    
    result = ['UnfairNN', clas_acc, dp, deqodds, deqopp, tradeoff[0], tradeoff[1], tradeoff[2]] + metrics_a0 + metrics_a1

    results.append(result)

> Epoch | Class Loss | Class Acc
> 1 | 0.3882751166820526 | 0.7657078912466844
> 2 | 0.3911265730857849 | 0.8118783156498673
> 3 | 0.39347174763679504 | 0.8273789787798409
> 4 | 0.3386576473712921 | 0.8274204244031831
> 5 | 0.347187876701355 | 0.830487400530504
> 6 | 0.33378928899765015 | 0.8265086206896551
> 7 | 0.365893691778183 | 0.8252238063660478
> 8 | 0.3630523085594177 | 0.8309433023872679
> 9 | 0.3241446018218994 | 0.8299900530503979
> 10 | 0.42222434282302856 | 0.8318136604774535
> 11 | 0.3776119351387024 | 0.8231929708222812
> 12 | 0.3099239468574524 | 0.831191976127321
> 13 | 0.33585864305496216 | 0.82907824933687
> 14 | 0.3018547296524048 | 0.8343832891246684
> 15 | 0.37438905239105225 | 0.8275033156498673
> 16 | 0.34225133061408997 | 0.8276690981432361
> 17 | 0.30041420459747314 | 0.8353365384615384
> 18 | 0.36504611372947693 | 0.8284565649867374
> 19 | 0.33269864320755005 | 0.8310261936339522
> 20 | 0.30380529165267944 | 0.8338444960212201
> 21 | 0.30134326219558716 | 0.8

## Saving into DF then CSV

In [14]:
result_df = pd.DataFrame(results, columns=header)
result_df

Unnamed: 0,model_name,clas_acc,dp,deqodds,deqopp,trade_dp,trade_deqodds,trade_deqopp,TN_a0,FP_a0,FN_a0,TP_a0,TN_a1,FP_a1,FN_a1,TP_a1
0,UnfairNN,0.829787,0.786031,0.914253,0.939538,0.807317,0.869975,0.881259,1565.0,102.0,72.0,140.0,2360.0,491.0,359.0,927.0
1,UnfairNN,0.829787,0.786031,0.914253,0.939538,0.807317,0.869975,0.881259,1565.0,102.0,72.0,140.0,2360.0,491.0,359.0,927.0
2,UnfairNN,0.829787,0.786031,0.914253,0.939538,0.807317,0.869975,0.881259,1565.0,102.0,72.0,140.0,2360.0,491.0,359.0,927.0
3,UnfairNN,0.829787,0.786031,0.914253,0.939538,0.807317,0.869975,0.881259,1565.0,102.0,72.0,140.0,2360.0,491.0,359.0,927.0
4,UnfairNN,0.829787,0.786031,0.914253,0.939538,0.807317,0.869975,0.881259,1565.0,102.0,72.0,140.0,2360.0,491.0,359.0,927.0


In [15]:
result_df.to_csv('results/validation_unfair_nn-200.csv')