# Testing file 
### where we evaluate BEUTEL's models using the test set

## Preliminaries

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.data import Dataset
from tensorflow.keras.optimizers import Adam, Adagrad


from util.load_data import load_data
from util.evaluation import *
from models.beutel.models import *
from models.beutel.learning import train_loop as beutel_train

In [2]:
batch_size = 32
epochs = 100
learning_rate = 0.01

In [3]:
FAIR_COEFFS = [1.]

In [4]:
cv_seeds = [13, 29, 42, 55, 73]

## Load data

In [5]:
data_name = 'adult'

In [6]:
x, y, a = load_data(data_name)
raw_data = (x, y, a)

In [7]:
xdim = x.shape[1]
ydim = y.shape[1]
adim = a.shape[1]
zdim = 8

## Result file

In [8]:
header = "model_name", "cv_seed","fair_coeff", "clas_acc", "dp", "deqodds", "deqopp", "trade_dp", "trade_deqodds", "trade_deqopp", "TN_a0", "FP_a0", "FN_a0", "TP_a0", "TN_a1", "FP_a1", "FN_a1", "TP_a1"
results = []

## Testing loop
#### Each model is evalueted 5 times
#### In the end of each iteration we save the result

### BEUTEL for DP

In [9]:
fairdef = 'DemPar'

for cv_seed in cv_seeds:
    x_train, x_test, y_train, y_test, a_train, a_test = train_test_split(
        x, y, a, test_size=0.3, random_state=cv_seed)

    train_data = Dataset.from_tensor_slices((x_train, y_train, a_train))
    train_data = train_data.batch(batch_size, drop_remainder=True)

    test_data = Dataset.from_tensor_slices((x_test, y_test, a_test))
    test_data = test_data.batch(batch_size, drop_remainder=True)

    for FAIR_COEFF in FAIR_COEFFS:

        opt = Adam(learning_rate=learning_rate)
    
        model = Beutel(xdim, ydim, adim, zdim, FAIR_COEFF, fairdef)

        ret = beutel_train(model, raw_data, train_data, epochs, opt)

        Y, A, Y_hat, A_hat = fair_evaluation(model, test_data)
        clas_acc, dp, deqodds, deqopp, confusion_matrix, metrics_a0, metrics_a1 = compute_metrics(Y, A, Y_hat, A_hat, adim)

        fair_metrics = (dp, deqodds, deqopp)
        tradeoff = []
        for fair_metric in fair_metrics:
            tradeoff.append(compute_tradeoff(clas_acc, fair_metric))

        result = ['BEUTEL4DP', cv_seed, FAIR_COEFF, clas_acc, dp, deqodds, deqopp, tradeoff[0], tradeoff[1], tradeoff[2]] + metrics_a0 + metrics_a1

        results.append(result)

        del(opt)

> Epoch | Model Loss | Class Loss | Adv Loss | Class Acc | Adv Acc
> 1 | 0.7048986554145813 | 0.7214328050613403 | 0.6883646249771118 | 0.49064711830131447 | 0.8546195652173914
> 2 | 0.6708391308784485 | 0.6342356204986572 | 0.7074426412582397 | 0.6901857937310415 | 0.376453488372093
> 3 | 0.6708784103393555 | 0.6341137290000916 | 0.7076429724693298 | 0.7339800303336703 | 0.3375884732052578
> 4 | 0.6708954572677612 | 0.6340923309326172 | 0.7076985239982605 | 0.7352439332659252 | 0.3372724974721941
> 5 | 0.6709012985229492 | 0.6340850591659546 | 0.7077175378799438 | 0.7354967138523761 | 0.33701971688574317
> 6 | 0.6709034442901611 | 0.6340824365615845 | 0.7077245116233826 | 0.7355283114256825 | 0.33698811931243683
> 7 | 0.6709042191505432 | 0.6340814828872681 | 0.7077270150184631 | 0.7356231041456016 | 0.33683013144590496
> 8 | 0.6709045171737671 | 0.6340811252593994 | 0.7077279686927795 | 0.7356231041456016 | 0.33683013144590496
> 9 | 0.6709046363830566 | 0.6340809464454651 | 0.7077282

### BEUTEL for Eq Opp

In [10]:
# fairdef = 'EqOpp'

# for FAIR_COEFF in FAIR_COEFFS:
#     for i in range(test_loop):

#         opt = Adam(learning_rate=learning_rate)

#         model = Beutel(xdim, ydim, adim, zdim, hidden_layer_specs, fairdef)

#         ret = beutel_train(model, raw_data, train_data, epochs, opt)

#         Y, A, Y_hat, A_hat = fair_evaluation(model, valid_data)
#         clas_acc, dp, deqodds, deqopp, confusion_matrix, metrics_a0, metrics_a1 = compute_metrics(Y, A, Y_hat, A_hat, adim)

#         fair_metrics = (dp, deqodds, deqopp)
#         tradeoff = []
#         for fair_metric in fair_metrics:
#             tradeoff.append(compute_tradeoff(clas_acc, fair_metric))

#         result = ['BEUTEL4EqOpp', FAIR_COEFF, clas_acc, dp, deqodds, deqopp, tradeoff[0], tradeoff[1], tradeoff[2]] + metrics_a0 + metrics_a1

#         # results.append(result)

## Saving into DF then CSV

In [11]:
result_df = pd.DataFrame(results, columns=header)
result_df

Unnamed: 0,model_name,cv_seed,fair_coeff,clas_acc,dp,deqodds,deqopp,trade_dp,trade_deqodds,trade_deqopp,TN_a0,FP_a0,FN_a0,TP_a0,TN_a1,FP_a1,FN_a1,TP_a1
0,BEUTEL4DP,13,1.0,0.738106,0.998231,0.991943,0.987373,0.848683,0.846403,0.844735,3772.0,109.0,488.0,23.0,6104.0,199.0,2749.0,92.0
1,BEUTEL4DP,29,1.0,0.420878,0.445642,0.446588,0.445488,0.432906,0.433352,0.432833,2400.0,1486.0,309.0,197.0,411.0,5884.0,160.0,2689.0
2,BEUTEL4DP,42,1.0,0.720228,0.999353,0.987806,0.978557,0.837136,0.833058,0.829751,3697.0,235.0,460.0,40.0,5844.0,391.0,2701.0,168.0
3,BEUTEL4DP,55,1.0,0.732934,0.998953,0.996213,0.994626,0.845513,0.84453,0.84396,3740.0,115.0,482.0,18.0,6074.0,201.0,2817.0,89.0
4,BEUTEL4DP,73,1.0,0.687131,0.993696,0.994319,0.995962,0.812456,0.812664,0.813212,3416.0,463.0,415.0,63.0,5446.0,790.0,2567.0,376.0


In [12]:
result_df.to_csv(f'{data_name}-result/beutel-{epochs}.csv')