# Testing file 
### where we evaluate BEUTEL's models using the test set

## Preliminaries

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.data import Dataset
from tensorflow.keras.optimizers import Adam, Adagrad


from util.load_data import load_data
from util.evaluation import *
from models.beutel.models import *
from models.beutel.learning import train_loop as beutel_train

In [2]:
batch_size = 32
epochs = 100
learning_rate = 0.01

In [3]:
FAIR_COEFFS = [1.]

In [4]:
cv_seeds = [13, 29, 42, 55, 73]

## Load data

In [5]:
data_name = 'titanic'

In [6]:
x, y, a = load_data(data_name)
raw_data = (x, y, a)

In [7]:
xdim = x.shape[1]
ydim = y.shape[1]
adim = a.shape[1]
zdim = 8

## Result file

In [8]:
header = "model_name", "cv_seed","fair_coeff", "clas_acc", "dp", "deqodds", "deqopp", "trade_dp", "trade_deqodds", "trade_deqopp", "TN_a0", "FP_a0", "FN_a0", "TP_a0", "TN_a1", "FP_a1", "FN_a1", "TP_a1"
results = []

## Testing loop
#### Each model is evalueted 5 times
#### In the end of each iteration we save the result

### BEUTEL for DP

In [9]:
fairdef = 'DemPar'

for cv_seed in cv_seeds:
    x_train, x_test, y_train, y_test, a_train, a_test = train_test_split(
        x, y, a, test_size=0.3, random_state=cv_seed)

    train_data = Dataset.from_tensor_slices((x_train, y_train, a_train))
    train_data = train_data.batch(batch_size, drop_remainder=True)

    test_data = Dataset.from_tensor_slices((x_test, y_test, a_test))
    test_data = test_data.batch(batch_size, drop_remainder=True)

    for FAIR_COEFF in FAIR_COEFFS:

        opt = Adam(learning_rate=learning_rate)
    
        model = Beutel(xdim, ydim, adim, zdim, FAIR_COEFF, fairdef)

        ret = beutel_train(model, raw_data, train_data, epochs, opt)

        Y, A, Y_hat, A_hat = fair_evaluation(model, test_data)
        clas_acc, dp, deqodds, deqopp, confusion_matrix, metrics_a0, metrics_a1 = compute_metrics(Y, A, Y_hat, A_hat, adim)

        fair_metrics = (dp, deqodds, deqopp)
        tradeoff = []
        for fair_metric in fair_metrics:
            tradeoff.append(compute_tradeoff(clas_acc, fair_metric))

        result = ['BEUTEL4DP', cv_seed, FAIR_COEFF, clas_acc, dp, deqodds, deqopp, tradeoff[0], tradeoff[1], tradeoff[2]] + metrics_a0 + metrics_a1

        results.append(result)

        del(opt)

> Epoch | Model Loss | Class Loss | Adv Loss | Class Acc | Adv Acc
> 1 | 0.7102263569831848 | 0.6909472942352295 | 0.7295054197311401 | 0.49506578947368424 | 0.5
> 2 | 0.6612179279327393 | 0.6458163261413574 | 0.6766194701194763 | 0.4407894736842105 | 0.6233552631578947
> 3 | 0.6898583769798279 | 0.701541543006897 | 0.6781753301620483 | 0.5016447368421053 | 0.6118421052631579
> 4 | 0.6602238416671753 | 0.6735606789588928 | 0.6468870639801025 | 0.40460526315789475 | 0.6595394736842105
> 5 | 0.6551238298416138 | 0.6453511118888855 | 0.6648964881896973 | 0.47039473684210525 | 0.6825657894736842
> 6 | 0.6663339734077454 | 0.6609464287757874 | 0.6717214584350586 | 0.5230263157894737 | 0.6200657894736842
> 7 | 0.675331711769104 | 0.6444687843322754 | 0.7061946392059326 | 0.4342105263157895 | 0.7680921052631579
> 8 | 0.6501919627189636 | 0.6754915714263916 | 0.6248923540115356 | 0.3815789473684211 | 0.8207236842105263
> 9 | 0.6491979360580444 | 0.6547811031341553 | 0.643614649772644 | 0.46546

### BEUTEL for Eq Opp

In [10]:
# fairdef = 'EqOpp'

# for FAIR_COEFF in FAIR_COEFFS:
#     for i in range(test_loop):

#         opt = Adam(learning_rate=learning_rate)

#         model = Beutel(xdim, ydim, adim, zdim, hidden_layer_specs, fairdef)

#         ret = beutel_train(model, raw_data, train_data, epochs, opt)

#         Y, A, Y_hat, A_hat = fair_evaluation(model, valid_data)
#         clas_acc, dp, deqodds, deqopp, confusion_matrix, metrics_a0, metrics_a1 = compute_metrics(Y, A, Y_hat, A_hat, adim)

#         fair_metrics = (dp, deqodds, deqopp)
#         tradeoff = []
#         for fair_metric in fair_metrics:
#             tradeoff.append(compute_tradeoff(clas_acc, fair_metric))

#         result = ['BEUTEL4EqOpp', FAIR_COEFF, clas_acc, dp, deqodds, deqopp, tradeoff[0], tradeoff[1], tradeoff[2]] + metrics_a0 + metrics_a1

#         # results.append(result)

## Saving into DF then CSV

In [11]:
result_df = pd.DataFrame(results, columns=header)
result_df

Unnamed: 0,model_name,cv_seed,fair_coeff,clas_acc,dp,deqodds,deqopp,trade_dp,trade_deqodds,trade_deqopp,TN_a0,FP_a0,FN_a0,TP_a0,TN_a1,FP_a1,FN_a1,TP_a1
0,BEUTEL4DP,13,1.0,0.425781,0.623643,0.499611,0.580952,0.506059,0.459751,0.491408,13.0,3.0,40.0,35.0,30.0,100.0,4.0,31.0
1,BEUTEL4DP,29,1.0,0.519531,0.906977,0.916523,0.845799,0.660638,0.663154,0.643682,12.0,13.0,23.0,38.0,68.0,70.0,17.0,15.0
2,BEUTEL4DP,42,1.0,0.4375,0.606125,0.52227,0.634366,0.50819,0.476141,0.517854,25.0,3.0,43.0,28.0,40.0,92.0,6.0,19.0
3,BEUTEL4DP,55,1.0,0.496094,0.982281,0.965626,0.984848,0.659242,0.655448,0.659819,9.0,11.0,32.0,32.0,70.0,69.0,17.0,16.0
4,BEUTEL4DP,73,1.0,0.480469,0.987013,0.927077,0.915541,0.646317,0.632919,0.630209,11.0,13.0,24.0,40.0,52.0,79.0,17.0,20.0


In [12]:
result_df.to_csv(f'{data_name}-result/beutel-{epochs}.csv')