# Testing file 
### where we evaluate BEUTEL's models using the test set

## Preliminaries

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.data import Dataset
from tensorflow.keras.optimizers import Adam, Adagrad


from util.load_data import load_data
from util.evaluation import *
from models.beutel.models import *
from models.beutel.learning import train_loop as beutel_train

In [2]:
batch_size = 32
epochs = 100
learning_rate = 0.01

In [3]:
FAIR_COEFFS = [1.]

In [None]:
cv_seeds = [13, 29, 42, 55, 73]

## Load data

In [None]:
data_name = 'adult-race'

In [None]:
x, y, a = load_data(data_name)
raw_data = (x, y, a)

In [None]:
xdim = x.shape[1]
ydim = y.shape[1]
adim = a.shape[1]
zdim = 8

## Result file

In [11]:
header = "model_name", "cv_seed","fair_coeff", "clas_acc", "dp", "deqodds", "deqopp", "trade_dp", "trade_deqodds", "trade_deqopp", "TN_a0", "FP_a0", "FN_a0", "TP_a0", "TN_a1", "FP_a1", "FN_a1", "TP_a1"
results = []

## Testing loop
#### Each model is evalueted 5 times
#### In the end of each iteration we save the result

### BEUTEL for DP

In [12]:
fairdef = 'DemPar'

for cv_seed in cv_seeds:
    x_train, x_test, y_train, y_test, a_train, a_test = train_test_split(
        x, y, a, test_size=0.3, random_state=cv_seed)

    train_data = Dataset.from_tensor_slices((x_train, y_train, a_train))
    train_data = train_data.batch(batch_size, drop_remainder=True)

    test_data = Dataset.from_tensor_slices((x_test, y_test, a_test))
    test_data = test_data.batch(batch_size, drop_remainder=True)

    for FAIR_COEFF in FAIR_COEFFS:

        opt = Adam(learning_rate=learning_rate)
    
        model = Beutel(xdim, ydim, adim, zdim, FAIR_COEFF, fairdef)

        ret = beutel_train(model, raw_data, train_data, epochs, opt)

        Y, A, Y_hat, A_hat = fair_evaluation(model, test_data)
        clas_acc, dp, deqodds, deqopp, confusion_matrix = compute_metrics(Y, A, Y_hat, A_hat, adim)

        fair_metrics = (dp, deqodds, deqopp)
        tradeoff = []
        for fair_metric in fair_metrics:
            tradeoff.append(compute_tradeoff(clas_acc, fair_metric))

        result = ['BEUTEL4DP', cv_seed, FAIR_COEFF, clas_acc, dp, deqodds, deqopp, tradeoff[0], tradeoff[1], tradeoff[2]]

        results.append(result)

        del(opt)

> Epoch | Model Loss | Class Loss | Adv Loss | Class Acc | Adv Acc


2022-06-13 19:45:03.379099: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcublas.so.10
2022-06-13 19:45:03.563742: W tensorflow/python/util/util.cc:329] Sets are not currently considered sequences, but this may change in the future, so consider avoiding using them.


> 1 | 0.7020904421806335 | 0.6811363697052002 | 0.7230445146560669 | 0.726085875331565 | 0.34963527851458887
> 2 | 0.6890798211097717 | 0.6643774509429932 | 0.7137821912765503 | 0.7487151856763926 | 0.3324767904509284
> 3 | 0.6754788160324097 | 0.6468339562416077 | 0.7041236162185669 | 0.7511190318302388 | 0.3376160477453581
> 4 | 0.6607792377471924 | 0.6280919313430786 | 0.6934664845466614 | 0.7572115384615384 | 0.37421253315649866
> 5 | 0.6446148157119751 | 0.607866108417511 | 0.6813634634017944 | 0.768691976127321 | 0.5169927055702918
> 6 | 0.6267290115356445 | 0.586036205291748 | 0.6674218773841858 | 0.7442390583554377 | 0.6666114058355438
> 7 | 0.6070429086685181 | 0.56280118227005 | 0.6512846946716309 | 0.725381299734748 | 0.7135692970822282
> 8 | 0.5858393907546997 | 0.538948118686676 | 0.6327306032180786 | 0.7189572281167109 | 0.7277022546419099
> 9 | 0.5640205144882202 | 0.5160726308822632 | 0.611968457698822 | 0.71684350132626 | 0.7332145225464191
> 10 | 0.5431709289550781 | 

### BEUTEL for Eq Opp

In [13]:
# fairdef = 'EqOpp'

# for FAIR_COEFF in FAIR_COEFFS:
#     for i in range(test_loop):

#         opt = Adam(learning_rate=learning_rate)

#         model = Beutel(xdim, ydim, adim, zdim, hidden_layer_specs, fairdef)

#         ret = beutel_train(model, raw_data, train_data, epochs, opt)

#         Y, A, Y_hat, A_hat = fair_evaluation(model, valid_data)
#         clas_acc, dp, deqodds, deqopp, confusion_matrix, metrics_a0, metrics_a1 = compute_metrics(Y, A, Y_hat, A_hat, adim)

#         fair_metrics = (dp, deqodds, deqopp)
#         tradeoff = []
#         for fair_metric in fair_metrics:
#             tradeoff.append(compute_tradeoff(clas_acc, fair_metric))

#         result = ['BEUTEL4EqOpp', FAIR_COEFF, clas_acc, dp, deqodds, deqopp, tradeoff[0], tradeoff[1], tradeoff[2]] + metrics_a0 + metrics_a1

#         # results.append(result)

## Saving into DF then CSV

In [14]:
result_df = pd.DataFrame(results, columns=header)
result_df

In [15]:
result_df.to_csv(f'{data_name}-result/beutel-{epochs}.csv')