# Testing file 
### where we evaluate Zhang's models using the test set

## Preliminaries

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.keras.optimizers import Adam
from tensorflow.data import Dataset


from util.load_data import load_data
from util.evaluation import *
from models.zhang.models import FairLogisticRegression
from models.zhang.learning import train_loop as zhang_train

In [2]:
batch_size = 64
epochs = 100
lr = 0.001

In [3]:
cv_seeds = [13, 29, 42, 55, 73]

## Load data

In [4]:
data_name = 'german'

In [5]:
x, y, a = load_data(data_name)
raw_data = (x, y, a)

In [6]:
xdim = x.shape[1]
ydim = y.shape[1]
adim = a.shape[1]
zdim = 8

## Result file

In [7]:
header = "model_name", "cv_seed", "clas_acc", "dp", "deqodds", "deqopp", "trade_dp", "trade_deqodds", "trade_deqopp", "TN_a0", "FP_a0", "FN_a0", "TP_a0", "TN_a1", "FP_a1", "FN_a1", "TP_a1"
results = []

## Testing loop
#### Each model is evalueted 5 times
#### In the end of each iteration we save the result

### Zhang for DP

In [8]:
fairdef = 'DemPar'

for cv_seed in cv_seeds:
    x_train, x_test, y_train, y_test, a_train, a_test = train_test_split(
        x, y, a, test_size=0.3, random_state=cv_seed)

    train_data = Dataset.from_tensor_slices((x_train, y_train, a_train))
    train_data = train_data.batch(batch_size, drop_remainder=True)

    test_data = Dataset.from_tensor_slices((x_test, y_test, a_test))
    test_data = test_data.batch(batch_size, drop_remainder=True)

    # train below

    opt = Adam(learning_rate=lr)

    model = FairLogisticRegression(xdim, ydim, adim, batch_size, fairdef)
    zhang_train(model, raw_data, train_data, epochs, opt)

    Y, A, Y_hat, A_hat = fair_evaluation(model, test_data)
    clas_acc, dp, deqodds, deqopp, confusion_matrix, metrics_a0, metrics_a1 = compute_metrics(Y, A, Y_hat, A_hat, adim)

    fair_metrics = (dp, deqodds, deqopp)
    tradeoff = []
    for fair_metric in fair_metrics:
        tradeoff.append(compute_tradeoff(clas_acc, fair_metric))

    result = ['Zhang4DP', cv_seed, clas_acc, dp, deqodds, deqopp, tradeoff[0], tradeoff[1], tradeoff[2]] + metrics_a0 + metrics_a1

    results.append(result)

    del(opt)

> Epoch | Class Loss | Adv Loss | Class Acc | Adv Acc
> 1 | 0.5918705463409424 | 1.0379714965820312 | 0.6734375 | 0.2984375
> 2 | 0.5884881615638733 | 1.027639389038086 | 0.6734375 | 0.2984375
> 3 | 0.5849838256835938 | 1.017638921737671 | 0.6734375 | 0.2984375
> 4 | 0.5815937519073486 | 1.0079704523086548 | 0.6734375 | 0.2984375
> 5 | 0.578369140625 | 0.998611330986023 | 0.6734375 | 0.2984375
> 6 | 0.5752931237220764 | 0.9895269870758057 | 0.6734375 | 0.2984375
> 7 | 0.5723352432250977 | 0.9806817770004272 | 0.6734375 | 0.2984375
> 8 | 0.5694695711135864 | 0.9720444679260254 | 0.6734375 | 0.2984375
> 9 | 0.5666785836219788 | 0.9635899662971497 | 0.6734375 | 0.2984375
> 10 | 0.563951849937439 | 0.9552992582321167 | 0.6734375 | 0.2984375
> 11 | 0.561284065246582 | 0.9471579790115356 | 0.6734375 | 0.2984375
> 12 | 0.5586731433868408 | 0.9391556978225708 | 0.6734375 | 0.2984375
> 13 | 0.5561193227767944 | 0.9312848448753357 | 0.6734375 | 0.2984375
> 14 | 0.5536235570907593 | 0.92354029417

### Zhang for Eq Odds

In [9]:
fairdef = 'EqOdds'

for cv_seed in cv_seeds:
    x_train, x_test, y_train, y_test, a_train, a_test = train_test_split(
        x, y, a, test_size=0.3, random_state=cv_seed)

    train_data = Dataset.from_tensor_slices((x_train, y_train, a_train))
    train_data = train_data.batch(batch_size, drop_remainder=True)

    test_data = Dataset.from_tensor_slices((x_test, y_test, a_test))
    test_data = test_data.batch(batch_size, drop_remainder=True)

    # train below

    opt = Adam(learning_rate=lr)
    
    model = FairLogisticRegression(xdim, ydim, adim, batch_size, fairdef)
    zhang_train(model, raw_data, train_data, epochs, opt)

    Y, A, Y_hat, A_hat = fair_evaluation(model, test_data)
    clas_acc, dp, deqodds, deqopp, confusion_matrix, metrics_a0, metrics_a1 = compute_metrics(Y, A, Y_hat, A_hat, adim)

    fair_metrics = (dp, deqodds, deqopp)
    tradeoff = []
    for fair_metric in fair_metrics:
        tradeoff.append(compute_tradeoff(clas_acc, fair_metric))

    result = ['Zhang4EqOdds', cv_seed, clas_acc, dp, deqodds, deqopp, tradeoff[0], tradeoff[1], tradeoff[2]] + metrics_a0 + metrics_a1

    results.append(result)

    del(opt)

> Epoch | Class Loss | Adv Loss | Class Acc | Adv Acc
> 1 | 0.5918697118759155 | 1.0342029333114624 | 0.6734375 | 0.2984375
> 2 | 0.5884822010993958 | 1.0194097757339478 | 0.6734375 | 0.2984375
> 3 | 0.5849649906158447 | 1.0046213865280151 | 0.6734375 | 0.2984375
> 4 | 0.5815534591674805 | 0.9901756048202515 | 0.6734375 | 0.2984375
> 5 | 0.5782995820045471 | 0.9761829376220703 | 0.6734375 | 0.2984375
> 6 | 0.5751881003379822 | 0.9626451730728149 | 0.6734375 | 0.2984375
> 7 | 0.5721908807754517 | 0.9495275020599365 | 0.6734375 | 0.2984375
> 8 | 0.5692838430404663 | 0.9367902278900146 | 0.6734375 | 0.2984375
> 9 | 0.5664511919021606 | 0.9243993163108826 | 0.6734375 | 0.2984375
> 10 | 0.5636833906173706 | 0.9123280048370361 | 0.6734375 | 0.2984375
> 11 | 0.5609760880470276 | 0.9005568027496338 | 0.6734375 | 0.2984375
> 12 | 0.5583276748657227 | 0.8890718221664429 | 0.6734375 | 0.2984375
> 13 | 0.5557384490966797 | 0.8778634071350098 | 0.6734375 | 0.2984375
> 14 | 0.5532094240188599 | 0.86

### Zhang for Eq Opp

In [10]:
fairdef = 'EqOpp'

for cv_seed in cv_seeds:
    x_train, x_test, y_train, y_test, a_train, a_test = train_test_split(
        x, y, a, test_size=0.3, random_state=cv_seed)

    train_data = Dataset.from_tensor_slices((x_train, y_train, a_train))
    train_data = train_data.batch(batch_size, drop_remainder=True)

    test_data = Dataset.from_tensor_slices((x_test, y_test, a_test))
    test_data = test_data.batch(batch_size, drop_remainder=True)

    # train below

    opt = Adam(learning_rate=lr)
    
    model = FairLogisticRegression(xdim, ydim, adim, batch_size, fairdef)
    zhang_train(model, raw_data, train_data, epochs, opt)

    Y, A, Y_hat, A_hat = fair_evaluation(model, test_data)
    clas_acc, dp, deqodds, deqopp, confusion_matrix, metrics_a0, metrics_a1 = compute_metrics(Y, A, Y_hat, A_hat, adim)

    fair_metrics = (dp, deqodds, deqopp)
    tradeoff = []
    for fair_metric in fair_metrics:
        tradeoff.append(compute_tradeoff(clas_acc, fair_metric))

    result = ['Zhang4EqOpp', cv_seed, clas_acc, dp, deqodds, deqopp, tradeoff[0], tradeoff[1], tradeoff[2]] + metrics_a0 + metrics_a1

    results.append(result)

    del(opt)

> Epoch | Class Loss | Adv Loss | Class Acc | Adv Acc
> 1 | 0.5919795036315918 | 0.7619445323944092 | 0.6734375 | 0.2984375
> 2 | 0.5888176560401917 | 0.7507501244544983 | 0.6734375 | 0.2984375
> 3 | 0.5854759216308594 | 0.739605724811554 | 0.6734375 | 0.2984375
> 4 | 0.5822166204452515 | 0.7287383079528809 | 0.6734375 | 0.2984375
> 5 | 0.5791046619415283 | 0.7182154059410095 | 0.6734375 | 0.2984375
> 6 | 0.5761302709579468 | 0.7080291509628296 | 0.6734375 | 0.2984375
> 7 | 0.5732675194740295 | 0.6981483697891235 | 0.6734375 | 0.2984375
> 8 | 0.5704933404922485 | 0.6885406970977783 | 0.6734375 | 0.2984375
> 9 | 0.5677918791770935 | 0.679179310798645 | 0.6734375 | 0.2984375
> 10 | 0.5651538372039795 | 0.6700440645217896 | 0.6734375 | 0.2984375
> 11 | 0.5625743865966797 | 0.6611207127571106 | 0.6734375 | 0.2984375
> 12 | 0.560051679611206 | 0.6523997783660889 | 0.6734375 | 0.2984375
> 13 | 0.5575857162475586 | 0.6438751220703125 | 0.6734375 | 0.2984375
> 14 | 0.5551770925521851 | 0.63554

## Saving into DF then CSV

In [11]:
result_df = pd.DataFrame(results, columns=header)
result_df

Unnamed: 0,model_name,cv_seed,clas_acc,dp,deqodds,deqopp,trade_dp,trade_deqodds,trade_deqopp,TN_a0,FP_a0,FN_a0,TP_a0,TN_a1,FP_a1,FN_a1,TP_a1
0,Zhang4DP,13,0.71875,0.781285,0.725085,0.895225,0.748714,0.721904,0.79734,19.0,7.0,15.0,43.0,12.0,30.0,20.0,110.0
1,Zhang4DP,29,0.714844,0.88672,0.922413,0.947767,0.791559,0.805471,0.81499,15.0,21.0,6.0,37.0,16.0,35.0,11.0,115.0
2,Zhang4DP,42,0.714844,0.861152,0.820804,0.923077,0.781207,0.764168,0.805724,10.0,12.0,8.0,44.0,9.0,43.0,10.0,120.0
3,Zhang4DP,55,0.714844,0.874156,0.84699,0.92475,0.786513,0.775327,0.80636,12.0,14.0,9.0,48.0,12.0,40.0,10.0,111.0
4,Zhang4DP,73,0.746094,0.952728,0.973723,0.965628,0.836844,0.844844,0.841783,9.0,21.0,5.0,44.0,14.0,30.0,9.0,124.0
5,Zhang4EqOdds,13,0.722656,0.76938,0.705854,0.895225,0.745286,0.714157,0.799737,20.0,6.0,15.0,43.0,12.0,30.0,20.0,110.0
6,Zhang4EqOdds,29,0.71875,0.86977,0.904672,0.931894,0.787081,0.801065,0.81156,15.0,21.0,6.0,37.0,15.0,36.0,9.0,117.0
7,Zhang4EqOdds,42,0.710938,0.893674,0.856993,0.95,0.791901,0.777163,0.813264,9.0,13.0,7.0,45.0,9.0,43.0,11.0,119.0
8,Zhang4EqOdds,55,0.71875,0.874643,0.842015,0.934029,0.78907,0.775515,0.812369,12.0,14.0,8.0,49.0,11.0,41.0,9.0,112.0
9,Zhang4EqOdds,73,0.746094,0.952728,0.973723,0.965628,0.836844,0.844844,0.841783,9.0,21.0,5.0,44.0,14.0,30.0,9.0,124.0


In [12]:
result_df.to_csv(f'{data_name}-result/zhang-{epochs}.csv')