# Testing file 
### where we evaluate BEUTEL's models using the test set

## Preliminaries

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.data import Dataset
from tensorflow.keras.optimizers import Adam, Adagrad


from util.load_data import load_data
from util.evaluation import *
from models.beutel.models import *
from models.beutel.learning import train_loop as beutel_train

In [2]:
batch_size = 32
epochs = 100
learning_rate = 0.01

In [3]:
FAIR_COEFFS = [1.]

In [4]:
cv_seeds = [13, 29, 42, 55, 73]

## Load data

In [5]:
data_name = 'german'

In [6]:
x, y, a = load_data(data_name)
raw_data = (x, y, a)

In [7]:
xdim = x.shape[1]
ydim = y.shape[1]
adim = a.shape[1]
zdim = 8

## Result file

In [8]:
header = "model_name", "cv_seed","fair_coeff", "clas_acc", "dp", "deqodds", "deqopp", "trade_dp", "trade_deqodds", "trade_deqopp", "TN_a0", "FP_a0", "FN_a0", "TP_a0", "TN_a1", "FP_a1", "FN_a1", "TP_a1"
results = []

## Testing loop
#### Each model is evalueted 5 times
#### In the end of each iteration we save the result

### BEUTEL for DP

In [9]:
fairdef = 'DemPar'

for cv_seed in cv_seeds:
    x_train, x_test, y_train, y_test, a_train, a_test = train_test_split(
        x, y, a, test_size=0.3, random_state=cv_seed)

    train_data = Dataset.from_tensor_slices((x_train, y_train, a_train))
    train_data = train_data.batch(batch_size, drop_remainder=True)

    test_data = Dataset.from_tensor_slices((x_test, y_test, a_test))
    test_data = test_data.batch(batch_size, drop_remainder=True)

    for FAIR_COEFF in FAIR_COEFFS:

        opt = Adam(learning_rate=learning_rate)
    
        model = Beutel(xdim, ydim, adim, zdim, FAIR_COEFF, fairdef)

        ret = beutel_train(model, raw_data, train_data, epochs, opt)

        Y, A, Y_hat, A_hat = fair_evaluation(model, test_data)
        clas_acc, dp, deqodds, deqopp, confusion_matrix, metrics_a0, metrics_a1 = compute_metrics(Y, A, Y_hat, A_hat, adim)

        fair_metrics = (dp, deqodds, deqopp)
        tradeoff = []
        for fair_metric in fair_metrics:
            tradeoff.append(compute_tradeoff(clas_acc, fair_metric))

        result = ['BEUTEL4DP', cv_seed, FAIR_COEFF, clas_acc, dp, deqodds, deqopp, tradeoff[0], tradeoff[1], tradeoff[2]] + metrics_a0 + metrics_a1

        results.append(result)

        del(opt)

> Epoch | Model Loss | Class Loss | Adv Loss | Class Acc | Adv Acc
> 1 | 0.6253901124000549 | 0.5316413044929504 | 0.7191388607025146 | 0.6622023809523809 | 0.6800595238095238
> 2 | 0.6217646598815918 | 0.5315810441970825 | 0.7119483351707458 | 0.6785714285714286 | 0.6964285714285714
> 3 | 0.6186708807945251 | 0.5318962335586548 | 0.7054455280303955 | 0.6785714285714286 | 0.6964285714285714
> 4 | 0.6161513328552246 | 0.5324298143386841 | 0.6998728513717651 | 0.6785714285714286 | 0.6964285714285714
> 5 | 0.6141088008880615 | 0.5330852270126343 | 0.6951323747634888 | 0.6785714285714286 | 0.6964285714285714
> 6 | 0.6124470233917236 | 0.5337932109832764 | 0.6911007761955261 | 0.6785714285714286 | 0.6964285714285714
> 7 | 0.6110860109329224 | 0.534504771232605 | 0.6876673102378845 | 0.6785714285714286 | 0.6964285714285714
> 8 | 0.6099621057510376 | 0.5351870059967041 | 0.6847372651100159 | 0.6785714285714286 | 0.6964285714285714
> 9 | 0.6090251207351685 | 0.5358191728591919 | 0.682231009006

### BEUTEL for Eq Opp

In [10]:
# fairdef = 'EqOpp'

# for FAIR_COEFF in FAIR_COEFFS:
#     for i in range(test_loop):

#         opt = Adam(learning_rate=learning_rate)

#         model = Beutel(xdim, ydim, adim, zdim, hidden_layer_specs, fairdef)

#         ret = beutel_train(model, raw_data, train_data, epochs, opt)

#         Y, A, Y_hat, A_hat = fair_evaluation(model, valid_data)
#         clas_acc, dp, deqodds, deqopp, confusion_matrix, metrics_a0, metrics_a1 = compute_metrics(Y, A, Y_hat, A_hat, adim)

#         fair_metrics = (dp, deqodds, deqopp)
#         tradeoff = []
#         for fair_metric in fair_metrics:
#             tradeoff.append(compute_tradeoff(clas_acc, fair_metric))

#         result = ['BEUTEL4EqOpp', FAIR_COEFF, clas_acc, dp, deqodds, deqopp, tradeoff[0], tradeoff[1], tradeoff[2]] + metrics_a0 + metrics_a1

#         # results.append(result)

## Saving into DF then CSV

In [11]:
result_df = pd.DataFrame(results, columns=header)
result_df

Unnamed: 0,model_name,cv_seed,fair_coeff,clas_acc,dp,deqodds,deqopp,trade_dp,trade_deqodds,trade_deqopp,TN_a0,FP_a0,FN_a0,TP_a0,TN_a1,FP_a1,FN_a1,TP_a1
0,BEUTEL4DP,13,1.0,0.711806,0.999013,0.991106,0.996286,0.831301,0.828551,0.830356,1.0,28.0,2.0,63.0,1.0,48.0,5.0,140.0
1,BEUTEL4DP,29,1.0,0.673611,1.0,1.0,1.0,0.804979,0.804979,0.804979,0.0,38.0,0.0,50.0,0.0,56.0,0.0,144.0
2,BEUTEL4DP,42,1.0,0.701389,1.0,1.0,1.0,0.82449,0.82449,0.82449,0.0,28.0,0.0,57.0,0.0,58.0,0.0,145.0
3,BEUTEL4DP,55,1.0,0.697917,1.0,1.0,1.0,0.822086,0.822086,0.822086,0.0,29.0,0.0,62.0,0.0,58.0,0.0,139.0
4,BEUTEL4DP,73,1.0,0.708333,1.0,1.0,1.0,0.829268,0.829268,0.829268,0.0,34.0,0.0,55.0,0.0,50.0,0.0,149.0


In [12]:
result_df.to_csv(f'{data_name}-result/beutel-{epochs}.csv')