# Testing file 
### where we evaluate LAFTR's models using the test set

## Preliminaries

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.data import Dataset
from tensorflow.keras.optimizers import Adam


from util.load_data import load_data
from util.evaluation import *
from models.madras_laftr.models import *
from models.madras_laftr.learning import train_loop as laftr_train

In [2]:
batch_size = 64
epochs = 100
learning_rate = 0.001

In [3]:
CLAS_COEFF = 1.
FAIR_COEFFS = [.2, .5, .7, 1.]
RECON_COEFF = 0.
hidden_layer_specs = {'clas':[8] , 'enc':[8] , 'dec':[8] , 'adv':[8]}

In [4]:
cv_seeds = [13, 29, 42, 55, 73]

## Load data

In [5]:
data_name = 'titanic'

In [6]:
x, y, a = load_data(data_name)
raw_data = (x, y, a)

In [7]:
xdim = x.shape[1]
ydim = y.shape[1]
adim = a.shape[1]
zdim = 8

## Result file

In [8]:
header = "model_name", "cv_seed", "fair_coeff", "clas_acc", "dp", "deqodds", "deqopp", "trade_dp", "trade_deqodds", "trade_deqopp", "TN_a0", "FP_a0", "FN_a0", "TP_a0", "TN_a1", "FP_a1", "FN_a1", "TP_a1"
results = []

## Testing loop
#### Each model is evalueted 5 times
#### In the end of each iteration we save the result

### LAFTR for DP

In [9]:
for cv_seed in cv_seeds:
    x_train, x_test, y_train, y_test, a_train, a_test = train_test_split(
        x, y, a, test_size=0.3, random_state=cv_seed)

    train_data = Dataset.from_tensor_slices((x_train, y_train, a_train))
    train_data = train_data.batch(batch_size, drop_remainder=True)

    test_data = Dataset.from_tensor_slices((x_test, y_test, a_test))
    test_data = test_data.batch(batch_size, drop_remainder=True)

    for FAIR_COEFF in FAIR_COEFFS:

        opt = Adam(learning_rate=learning_rate)

        model = DemParGan(xdim, ydim, adim, zdim, hidden_layer_specs, recon_coeff=RECON_COEFF, clas_coeff=CLAS_COEFF, fair_coeff=FAIR_COEFF)
        ret = laftr_train(model, raw_data, train_data, epochs, opt)

        Y, A, Y_hat, A_hat = fair_evaluation(model, test_data)
        clas_acc, dp, deqodds, deqopp, confusion_matrix, metrics_a0, metrics_a1 = compute_metrics(Y, A, Y_hat, A_hat, adim)

        fair_metrics = (dp, deqodds, deqopp)
        tradeoff = []
        for fair_metric in fair_metrics:
            tradeoff.append(compute_tradeoff(clas_acc, fair_metric))

        result = ['LAFTR4DP', cv_seed, FAIR_COEFF, clas_acc, dp, deqodds, deqopp, tradeoff[0], tradeoff[1], tradeoff[2]] + metrics_a0 + metrics_a1

        results.append(result)

        del(opt)

> Epoch | Model Loss | Class Loss | Adv Loss | Dec Loss | Class Acc | Adv Acc | Dec Acc
> 1 | 0.2430640310049057 | 0.9161823987960815 | 0.934951901435852 | 104.61543273925781 | 0.3680555555555556 | 0.3506944444444444 | 0.019097222222222224
> 2 | 0.23414766788482666 | 0.882422149181366 | 0.899895429611206 | 104.61543273925781 | 0.3680555555555556 | 0.3506944444444444 | 0.019097222222222224
> 3 | 0.22241675853729248 | 0.8376826643943787 | 0.8521621227264404 | 104.61543273925781 | 0.3680555555555556 | 0.3506944444444444 | 0.019097222222222224
> 4 | 0.20829729735851288 | 0.7829469442367554 | 0.7902752161026001 | 104.61543273925781 | 0.3680555555555556 | 0.3506944444444444 | 0.019097222222222224
> 5 | 0.19506102800369263 | 0.7303628921508789 | 0.7258991599082947 | 104.61543273925781 | 0.3541666666666667 | 0.3472222222222222 | 0.019097222222222224
> 6 | 0.1856505423784256 | 0.6935237646102905 | 0.6828606128692627 | 104.61543273925781 | 0.4756944444444444 | 0.4913194444444444 | 0.019097222222

### LAFTR for Eq Odds

In [10]:
for cv_seed in cv_seeds:
    x_train, x_test, y_train, y_test, a_train, a_test = train_test_split(
        x, y, a, test_size=0.3, random_state=cv_seed)

    train_data = Dataset.from_tensor_slices((x_train, y_train, a_train))
    train_data = train_data.batch(batch_size, drop_remainder=True)

    test_data = Dataset.from_tensor_slices((x_test, y_test, a_test))
    test_data = test_data.batch(batch_size, drop_remainder=True)
    
    for FAIR_COEFF in FAIR_COEFFS:

        opt = Adam(learning_rate=learning_rate)

        model = EqOddsUnweightedGan(xdim, ydim, adim, zdim, hidden_layer_specs, recon_coeff=RECON_COEFF, clas_coeff=CLAS_COEFF, fair_coeff=FAIR_COEFF)
        ret = laftr_train(model, raw_data, train_data, epochs, opt)

        Y, A, Y_hat, A_hat = fair_evaluation(model, test_data)
        clas_acc, dp, deqodds, deqopp, confusion_matrix, metrics_a0, metrics_a1 = compute_metrics(Y, A, Y_hat, A_hat, adim)

        fair_metrics = (dp, deqodds, deqopp)
        tradeoff = []
        for fair_metric in fair_metrics:
            tradeoff.append(compute_tradeoff(clas_acc, fair_metric))

        result = ['LAFTR4EqOdds', cv_seed, FAIR_COEFF, clas_acc, dp, deqodds, deqopp, tradeoff[0], tradeoff[1], tradeoff[2]] + metrics_a0 + metrics_a1

        results.append(result)

        del(opt)

> Epoch | Model Loss | Class Loss | Adv Loss | Dec Loss | Class Acc | Adv Acc | Dec Acc
> 1 | -0.006242476869374514 | 0.9161982536315918 | 0.9349256753921509 | 104.61543273925781 | 0.3680555555555556 | 0.3506944444444444 | 0.019097222222222224
> 2 | -0.005762217100709677 | 0.8830562829971313 | 0.9003428816795349 | 104.61543273925781 | 0.3680555555555556 | 0.3506944444444444 | 0.019097222222222224
> 3 | -0.004906819202005863 | 0.8421057462692261 | 0.8568261861801147 | 104.61543273925781 | 0.3680555555555556 | 0.3506944444444444 | 0.019097222222222224
> 4 | -0.0038203364238142967 | 0.7979626655578613 | 0.8094236850738525 | 104.61543273925781 | 0.3680555555555556 | 0.3506944444444444 | 0.019097222222222224
> 5 | -0.003226596163585782 | 0.7577776908874512 | 0.7674574851989746 | 104.61543273925781 | 0.3680555555555556 | 0.3506944444444444 | 0.019097222222222224
> 6 | -0.00491370027884841 | 0.725875735282898 | 0.7406168580055237 | 104.61543273925781 | 0.3680555555555556 | 0.3506944444444444 

### LAFTR for Eq Opp

In [11]:
for cv_seed in cv_seeds:
    x_train, x_test, y_train, y_test, a_train, a_test = train_test_split(
        x, y, a, test_size=0.3, random_state=cv_seed)

    train_data = Dataset.from_tensor_slices((x_train, y_train, a_train))
    train_data = train_data.batch(batch_size, drop_remainder=True)

    test_data = Dataset.from_tensor_slices((x_test, y_test, a_test))
    test_data = test_data.batch(batch_size, drop_remainder=True)
    
    for FAIR_COEFF in FAIR_COEFFS:

        opt = Adam(learning_rate=learning_rate)

        model = EqOppUnweightedGan(xdim, ydim, adim, zdim, hidden_layer_specs, recon_coeff=RECON_COEFF, clas_coeff=CLAS_COEFF, fair_coeff=FAIR_COEFF)
        ret = laftr_train(model, raw_data, train_data, epochs, opt)

        Y, A, Y_hat, A_hat = fair_evaluation(model, test_data)
        clas_acc, dp, deqodds, deqopp, confusion_matrix, metrics_a0, metrics_a1 = compute_metrics(Y, A, Y_hat, A_hat, adim)

        fair_metrics = (dp, deqodds, deqopp)
        tradeoff = []
        for fair_metric in fair_metrics:
            tradeoff.append(compute_tradeoff(clas_acc, fair_metric))

        result = ['LAFTR4EqOpp', cv_seed, FAIR_COEFF, clas_acc, dp, deqodds, deqopp, tradeoff[0], tradeoff[1], tradeoff[2]] + metrics_a0 + metrics_a1

        results.append(result)

        del(opt)

> Epoch | Model Loss | Class Loss | Adv Loss | Dec Loss | Class Acc | Adv Acc | Dec Acc
> 1 | -0.007292155176401138 | 0.9155250787734985 | 0.9349708557128906 | 104.61543273925781 | 0.3680555555555556 | 0.3506944444444444 | 0.019097222222222224
> 2 | -0.0071904342621564865 | 0.881475567817688 | 0.9006500244140625 | 104.61543273925781 | 0.3680555555555556 | 0.3506944444444444 | 0.019097222222222224
> 3 | -0.006775451824069023 | 0.8391532897949219 | 0.8572211265563965 | 104.61543273925781 | 0.3680555555555556 | 0.3506944444444444 | 0.019097222222222224
> 4 | -0.006045681424438953 | 0.7918446063995361 | 0.8079664707183838 | 104.61543273925781 | 0.3680555555555556 | 0.3506944444444444 | 0.019097222222222224
> 5 | -0.005136490799486637 | 0.7438696622848511 | 0.7575669288635254 | 104.61543273925781 | 0.3680555555555556 | 0.3506944444444444 | 0.019097222222222224
> 6 | -0.004357393365353346 | 0.7001094818115234 | 0.7117291688919067 | 104.61543273925781 | 0.3680555555555556 | 0.3506944444444444

## Saving into DF then CSV

In [12]:
result_df = pd.DataFrame(results, columns=header)
result_df

Unnamed: 0,model_name,cv_seed,fair_coeff,clas_acc,dp,deqodds,deqopp,trade_dp,trade_deqodds,trade_deqopp,TN_a0,FP_a0,FN_a0,TP_a0,TN_a1,FP_a1,FN_a1,TP_a1
0,LAFTR4DP,13,0.2,0.738281,0.519081,0.665499,0.67619,0.609574,0.700003,0.705873,9.0,7.0,25.0,50.0,118.0,12.0,23.0,12.0
1,LAFTR4DP,13,0.5,0.726562,0.683916,0.883191,0.79619,0.704595,0.797257,0.759785,15.0,1.0,34.0,41.0,118.0,12.0,23.0,12.0
2,LAFTR4DP,13,0.7,0.726562,0.696037,0.893631,0.824762,0.710972,0.801483,0.772554,15.0,1.0,34.0,41.0,117.0,13.0,22.0,13.0
3,LAFTR4DP,13,1.0,0.710938,0.70323,0.911419,0.824762,0.707063,0.798791,0.763631,14.0,2.0,34.0,41.0,114.0,16.0,22.0,13.0
4,LAFTR4DP,29,0.2,0.789062,0.537209,0.753578,0.696721,0.639223,0.770912,0.740022,17.0,8.0,12.0,49.0,120.0,18.0,16.0,16.0
5,LAFTR4DP,29,0.5,0.761719,0.659644,0.872876,0.826332,0.707016,0.813518,0.792711,19.0,6.0,18.0,43.0,116.0,22.0,15.0,17.0
6,LAFTR4DP,29,0.7,0.734375,0.746785,0.934628,0.909836,0.740528,0.822488,0.812743,20.0,5.0,25.0,36.0,116.0,22.0,16.0,16.0
7,LAFTR4DP,29,1.0,0.742188,0.752804,0.961305,0.955943,0.747458,0.837654,0.835612,20.0,5.0,24.0,37.0,115.0,23.0,14.0,18.0
8,LAFTR4DP,42,0.2,0.730469,0.60413,0.767232,0.803944,0.66132,0.748399,0.765447,16.0,12.0,23.0,48.0,111.0,21.0,13.0,12.0
9,LAFTR4DP,42,0.5,0.703125,0.736988,0.90525,0.942535,0.719658,0.791487,0.805415,19.0,9.0,30.0,41.0,107.0,25.0,12.0,13.0


In [13]:
result_df.to_csv(f'{data_name}-result/laftr-{epochs}.csv')