# Testing file 
### where we evaluate Zhang's models using the test set

## Preliminaries

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.keras.optimizers import Adam
from tensorflow.data import Dataset


from util.load_data import load_data
from util.evaluation import *
from models.zhang.models import FairLogisticRegression
from models.zhang.learning import train_loop as zhang_train

In [2]:
batch_size = 64
epochs = 100
lr = 0.001

In [3]:
cv_seeds = [13, 29, 42, 55, 73]

## Load data

In [4]:
data_name = 'adult'

In [5]:
x, y, a = load_data(data_name)
raw_data = (x, y, a)

In [6]:
xdim = x.shape[1]
ydim = y.shape[1]
adim = a.shape[1]
zdim = 8

## Result file

In [7]:
header = "model_name", "cv_seed", "clas_acc", "dp", "deqodds", "deqopp", "trade_dp", "trade_deqodds", "trade_deqopp", "TN_a0", "FP_a0", "FN_a0", "TP_a0", "TN_a1", "FP_a1", "FN_a1", "TP_a1"
results = []

## Testing loop
#### Each model is evalueted 5 times
#### In the end of each iteration we save the result

### Zhang for DP

In [8]:
fairdef = 'DemPar'

for cv_seed in cv_seeds:
    x_train, x_test, y_train, y_test, a_train, a_test = train_test_split(
        x, y, a, test_size=0.3, random_state=cv_seed)

    train_data = Dataset.from_tensor_slices((x_train, y_train, a_train))
    train_data = train_data.batch(batch_size, drop_remainder=True)

    test_data = Dataset.from_tensor_slices((x_test, y_test, a_test))
    test_data = test_data.batch(batch_size, drop_remainder=True)

    # train below

    opt = Adam(learning_rate=lr)
    
    model = FairLogisticRegression(xdim, ydim, adim, batch_size, fairdef)
    zhang_train(model, raw_data, train_data, epochs, opt)

    Y, A, Y_hat, A_hat = fair_evaluation(model, test_data)
    clas_acc, dp, deqodds, deqopp, confusion_matrix, metrics_a0, metrics_a1 = compute_metrics(Y, A, Y_hat, A_hat, adim)

    fair_metrics = (dp, deqodds, deqopp)
    tradeoff = []
    for fair_metric in fair_metrics:
        tradeoff.append(compute_tradeoff(clas_acc, fair_metric))

    result = ['Zhang4DP', cv_seed, clas_acc, dp, deqodds, deqopp, tradeoff[0], tradeoff[1], tradeoff[2]] + metrics_a0 + metrics_a1

    results.append(result)

    del(opt)

> Epoch | Class Loss | Adv Loss | Class Acc | Adv Acc
> 1 | 0.4150388836860657 | 0.746207594871521 | 0.6860766700404858 | 0.32518345141700405
> 2 | 0.3644813299179077 | 0.659429132938385 | 0.8214195344129555 | 0.47918775303643724
> 3 | 0.3455881178379059 | 0.6233700513839722 | 0.8278087044534413 | 0.7169787449392713
> 4 | 0.33521145582199097 | 0.6088754534721375 | 0.8318572874493927 | 0.6871837044534413
> 5 | 0.32871943712234497 | 0.6031975746154785 | 0.8343560222672065 | 0.6767143218623481
> 6 | 0.3243919610977173 | 0.6010741591453552 | 0.835463056680162 | 0.6753858805668016
> 7 | 0.32138144969940186 | 0.6004577279090881 | 0.8373292004048583 | 0.675290991902834
> 8 | 0.31921055912971497 | 0.6005649566650391 | 0.8389423076923077 | 0.6757970647773279
> 9 | 0.3175855576992035 | 0.6011413931846619 | 0.8395432692307693 | 0.6769673582995951
> 10 | 0.31632164120674133 | 0.6020426750183105 | 0.8406819331983806 | 0.6777580971659919
> 11 | 0.31530165672302246 | 0.6032149195671082 | 0.8416308198

### Zhang for Eq Odds

In [9]:
fairdef = 'EqOdds'

for cv_seed in cv_seeds:
    x_train, x_test, y_train, y_test, a_train, a_test = train_test_split(
        x, y, a, test_size=0.3, random_state=cv_seed)

    train_data = Dataset.from_tensor_slices((x_train, y_train, a_train))
    train_data = train_data.batch(batch_size, drop_remainder=True)

    test_data = Dataset.from_tensor_slices((x_test, y_test, a_test))
    test_data = test_data.batch(batch_size, drop_remainder=True)

    # train below

    opt = Adam(learning_rate=lr)

    model = FairLogisticRegression(xdim, ydim, adim, batch_size, fairdef)
    zhang_train(model, raw_data, train_data, epochs, opt)

    Y, A, Y_hat, A_hat = fair_evaluation(model, test_data)
    clas_acc, dp, deqodds, deqopp, confusion_matrix, metrics_a0, metrics_a1 = compute_metrics(Y, A, Y_hat, A_hat, adim)

    fair_metrics = (dp, deqodds, deqopp)
    tradeoff = []
    for fair_metric in fair_metrics:
        tradeoff.append(compute_tradeoff(clas_acc, fair_metric))

    result = ['Zhang4EqOdds', cv_seed, clas_acc, dp, deqodds, deqopp, tradeoff[0], tradeoff[1], tradeoff[2]] + metrics_a0 + metrics_a1

    results.append(result)

    del(opt)

> Epoch | Class Loss | Adv Loss | Class Acc | Adv Acc
> 1 | 0.4332188367843628 | 0.6937553286552429 | 0.6973051619433198 | 0.3402074898785425
> 2 | 0.37514346837997437 | 0.609142541885376 | 0.8218939777327935 | 0.6475202429149798
> 3 | 0.35245925188064575 | 0.5828326940536499 | 0.8273026315789473 | 0.7266573886639676
> 4 | 0.3406107425689697 | 0.5754573345184326 | 0.8298962550607287 | 0.7304213056680162
> 5 | 0.33302468061447144 | 0.575050950050354 | 0.832268471659919 | 0.7260564271255061
> 6 | 0.3278496265411377 | 0.5773186683654785 | 0.8343243927125507 | 0.7206477732793523
> 7 | 0.32416990399360657 | 0.580694317817688 | 0.8362854251012146 | 0.719730516194332
> 8 | 0.32146432995796204 | 0.5845776796340942 | 0.8374873481781376 | 0.7202365890688259
> 9 | 0.3194071352481842 | 0.5886556506156921 | 0.8385943825910931 | 0.7215334008097166
> 10 | 0.31779399514198303 | 0.5927678346633911 | 0.8395748987854251 | 0.7226720647773279
> 11 | 0.3164890706539154 | 0.596808910369873 | 0.84093496963562

### Zhang for Eq Opp

In [10]:
fairdef = 'EqOpp'

for cv_seed in cv_seeds:
    x_train, x_test, y_train, y_test, a_train, a_test = train_test_split(
        x, y, a, test_size=0.3, random_state=cv_seed)

    train_data = Dataset.from_tensor_slices((x_train, y_train, a_train))
    train_data = train_data.batch(batch_size, drop_remainder=True)

    test_data = Dataset.from_tensor_slices((x_test, y_test, a_test))
    test_data = test_data.batch(batch_size, drop_remainder=True)

    # train below

    opt = Adam(learning_rate=lr)

    model = FairLogisticRegression(xdim, ydim, adim, batch_size, fairdef)
    zhang_train(model, raw_data, train_data, epochs, opt)

    Y, A, Y_hat, A_hat = fair_evaluation(model, test_data)
    clas_acc, dp, deqodds, deqopp, confusion_matrix, metrics_a0, metrics_a1 = compute_metrics(Y, A, Y_hat, A_hat, adim)

    fair_metrics = (dp, deqodds, deqopp)
    tradeoff = []
    for fair_metric in fair_metrics:
        tradeoff.append(compute_tradeoff(clas_acc, fair_metric))

    result = ['Zhang4EqOpp', cv_seed, clas_acc, dp, deqodds, deqopp, tradeoff[0], tradeoff[1], tradeoff[2]] + metrics_a0 + metrics_a1

    results.append(result)

    del(opt)

> Epoch | Class Loss | Adv Loss | Class Acc | Adv Acc
> 1 | 0.4167071282863617 | 0.1535322368144989 | 0.6865827429149798 | 0.3282831477732793
> 2 | 0.36870408058166504 | 0.10809452831745148 | 0.8226530870445344 | 0.471185475708502
> 3 | 0.3498004376888275 | 0.09275442361831665 | 0.8279035931174089 | 0.5763537449392713
> 4 | 0.3390975296497345 | 0.08908288180828094 | 0.8314777327935222 | 0.6332553137651822
> 5 | 0.3320850133895874 | 0.09034185111522675 | 0.8341029858299596 | 0.6704832995951417
> 6 | 0.3272702097892761 | 0.09237869083881378 | 0.8366966093117408 | 0.6903782894736842
> 7 | 0.32396677136421204 | 0.09400853514671326 | 0.8381515688259109 | 0.6930668016194332
> 8 | 0.32160472869873047 | 0.09561667591333389 | 0.8394483805668016 | 0.6934779858299596
> 9 | 0.31985610723495483 | 0.09700166434049606 | 0.8402707489878543 | 0.6925923582995951
> 10 | 0.31851208209991455 | 0.09807877987623215 | 0.8406186740890689 | 0.6921811740890689
> 11 | 0.3174431324005127 | 0.09891863167285919 | 0.

## Saving into DF then CSV

In [11]:
result_df = pd.DataFrame(results, columns=header)
result_df

Unnamed: 0,model_name,cv_seed,clas_acc,dp,deqodds,deqopp,trade_dp,trade_deqodds,trade_deqopp,TN_a0,FP_a0,FN_a0,TP_a0,TN_a1,FP_a1,FN_a1,TP_a1
0,Zhang4DP,13,0.849378,0.811971,0.898506,0.878278,0.830253,0.873251,0.863586,3786.0,83.0,252.0,259.0,5643.0,646.0,1053.0,1782.0
1,Zhang4DP,29,0.850267,0.804985,0.894028,0.873792,0.827006,0.871598,0.861869,3796.0,82.0,246.0,260.0,5607.0,671.0,1023.0,1819.0
2,Zhang4DP,42,0.85545,0.810959,0.911109,0.898743,0.832611,0.882403,0.876562,3841.0,84.0,230.0,269.0,5610.0,609.0,1029.0,1832.0
3,Zhang4DP,55,0.850637,0.813102,0.899535,0.872677,0.831446,0.874403,0.861516,3761.0,88.0,249.0,250.0,5657.0,604.0,1076.0,1819.0
4,Zhang4DP,73,0.847675,0.799658,0.893716,0.871354,0.822967,0.870087,0.859351,3779.0,90.0,235.0,243.0,5556.0,667.0,1065.0,1869.0
5,Zhang4EqOdds,13,0.84997,0.811158,0.897152,0.875968,0.830111,0.872924,0.862774,3790.0,79.0,253.0,258.0,5647.0,642.0,1052.0,1783.0
6,Zhang4EqOdds,29,0.849896,0.803761,0.892257,0.871464,0.826185,0.870562,0.860545,3797.0,81.0,247.0,259.0,5601.0,677.0,1022.0,1820.0
7,Zhang4EqOdds,42,0.855302,0.810078,0.911152,0.900048,0.832076,0.882344,0.877105,3842.0,83.0,229.0,270.0,5604.0,615.0,1027.0,1834.0
8,Zhang4EqOdds,55,0.850785,0.813079,0.897851,0.868669,0.831505,0.873684,0.859634,3761.0,88.0,251.0,248.0,5661.0,600.0,1076.0,1819.0
9,Zhang4EqOdds,73,0.847379,0.799198,0.891714,0.867511,0.822583,0.868981,0.857326,3779.0,90.0,237.0,241.0,5555.0,668.0,1066.0,1868.0


In [12]:
result_df.to_csv(f'{data_name}-result/zhang-{epochs}.csv')