# Testing file 
### where we evaluate Zhang's models using the test set

## Preliminaries

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.keras.optimizers import Adam
from tensorflow.data import Dataset


from util.load_data import load_data
from util.evaluation import *
from models.zhang.models import FairLogisticRegression
from models.zhang.learning import train_loop as zhang_train

In [2]:
batch_size = 64
epochs = 100
lr = 0.001

In [3]:
cv_seeds = [13, 29, 42, 55, 73]

## Load data

In [4]:
data_name = 'titanic'

In [5]:
x, y, a = load_data(data_name)
raw_data = (x, y, a)

In [6]:
xdim = x.shape[1]
ydim = y.shape[1]
adim = a.shape[1]
zdim = 8

## Result file

In [7]:
header = "model_name", "cv_seed", "clas_acc", "dp", "deqodds", "deqopp", "trade_dp", "trade_deqodds", "trade_deqopp", "TN_a0", "FP_a0", "FN_a0", "TP_a0", "TN_a1", "FP_a1", "FN_a1", "TP_a1"
results = []

## Testing loop
#### Each model is evalueted 5 times
#### In the end of each iteration we save the result

### Zhang for DP

In [8]:
fairdef = 'DemPar'

for cv_seed in cv_seeds:
    x_train, x_test, y_train, y_test, a_train, a_test = train_test_split(
        x, y, a, test_size=0.3, random_state=cv_seed)

    train_data = Dataset.from_tensor_slices((x_train, y_train, a_train))
    train_data = train_data.batch(batch_size, drop_remainder=True)

    test_data = Dataset.from_tensor_slices((x_test, y_test, a_test))
    test_data = test_data.batch(batch_size, drop_remainder=True)

    # train below

    opt = Adam(learning_rate=lr)

    model = FairLogisticRegression(xdim, ydim, adim, batch_size, fairdef)
    zhang_train(model, raw_data, train_data, epochs, opt)

    Y, A, Y_hat, A_hat = fair_evaluation(model, test_data)
    clas_acc, dp, deqodds, deqopp, confusion_matrix, metrics_a0, metrics_a1 = compute_metrics(Y, A, Y_hat, A_hat, adim)

    fair_metrics = (dp, deqodds, deqopp)
    tradeoff = []
    for fair_metric in fair_metrics:
        tradeoff.append(compute_tradeoff(clas_acc, fair_metric))

    result = ['Zhang4DP', cv_seed, clas_acc, dp, deqodds, deqopp, tradeoff[0], tradeoff[1], tradeoff[2]] + metrics_a0 + metrics_a1

    results.append(result)

    del(opt)

> Epoch | Class Loss | Adv Loss | Class Acc | Adv Acc
> 1 | 0.8991293907165527 | 0.9478432536125183 | 0.3645833333333333 | 0.3506944444444444
> 2 | 0.8805515766143799 | 0.9411742687225342 | 0.3454861111111111 | 0.3506944444444444
> 3 | 0.8636325597763062 | 0.9344364404678345 | 0.3420138888888889 | 0.3506944444444444
> 4 | 0.8481394648551941 | 0.9276650547981262 | 0.3541666666666667 | 0.3506944444444444
> 5 | 0.8311158418655396 | 0.9212372303009033 | 0.359375 | 0.3506944444444444
> 6 | 0.8138572573661804 | 0.9152399301528931 | 0.3663194444444444 | 0.3506944444444444
> 7 | 0.7983086109161377 | 0.9094412922859192 | 0.3697916666666667 | 0.3506944444444444
> 8 | 0.7838771939277649 | 0.9038481712341309 | 0.375 | 0.3506944444444444
> 9 | 0.769904375076294 | 0.8986423015594482 | 0.3871527777777778 | 0.3506944444444444
> 10 | 0.756597638130188 | 0.8937938213348389 | 0.3993055555555556 | 0.3506944444444444
> 11 | 0.7441437244415283 | 0.8892005681991577 | 0.40625 | 0.3506944444444444
> 12 | 0.732

### Zhang for Eq Odds

In [9]:
fairdef = 'EqOdds'

for cv_seed in cv_seeds:
    x_train, x_test, y_train, y_test, a_train, a_test = train_test_split(
        x, y, a, test_size=0.3, random_state=cv_seed)

    train_data = Dataset.from_tensor_slices((x_train, y_train, a_train))
    train_data = train_data.batch(batch_size, drop_remainder=True)

    test_data = Dataset.from_tensor_slices((x_test, y_test, a_test))
    test_data = test_data.batch(batch_size, drop_remainder=True)

    # train below

    opt = Adam(learning_rate=lr)

    model = FairLogisticRegression(xdim, ydim, adim, batch_size, fairdef)
    zhang_train(model, raw_data, train_data, epochs, opt)

    Y, A, Y_hat, A_hat = fair_evaluation(model, test_data)
    clas_acc, dp, deqodds, deqopp, confusion_matrix, metrics_a0, metrics_a1 = compute_metrics(Y, A, Y_hat, A_hat, adim)

    fair_metrics = (dp, deqodds, deqopp)
    tradeoff = []
    for fair_metric in fair_metrics:
        tradeoff.append(compute_tradeoff(clas_acc, fair_metric))

    result = ['Zhang4EqOdds', cv_seed, clas_acc, dp, deqodds, deqopp, tradeoff[0], tradeoff[1], tradeoff[2]] + metrics_a0 + metrics_a1

    results.append(result)

    del(opt)

> Epoch | Class Loss | Adv Loss | Class Acc | Adv Acc
> 1 | 0.8991373777389526 | 0.9453623294830322 | 0.3645833333333333 | 0.3506944444444444
> 2 | 0.8805599212646484 | 0.9360097050666809 | 0.3454861111111111 | 0.3506944444444444
> 3 | 0.8636873960494995 | 0.9262063503265381 | 0.3420138888888889 | 0.3506944444444444
> 4 | 0.8482683897018433 | 0.9162303805351257 | 0.3541666666666667 | 0.3506944444444444
> 5 | 0.8312630653381348 | 0.9069061279296875 | 0.359375 | 0.3506944444444444
> 6 | 0.8140367269515991 | 0.8983746767044067 | 0.3663194444444444 | 0.3506944444444444
> 7 | 0.7985506653785706 | 0.8901621103286743 | 0.3697916666666667 | 0.3506944444444444
> 8 | 0.7841881513595581 | 0.882300853729248 | 0.375 | 0.3506944444444444
> 9 | 0.7702751159667969 | 0.8751462697982788 | 0.3888888888888889 | 0.3506944444444444
> 10 | 0.7570350170135498 | 0.8686204552650452 | 0.3993055555555556 | 0.3506944444444444
> 11 | 0.7446549534797668 | 0.8625203371047974 | 0.40625 | 0.3506944444444444
> 12 | 0.73

### Zhang for Eq Opp

In [10]:
fairdef = 'EqOpp'

for cv_seed in cv_seeds:
    x_train, x_test, y_train, y_test, a_train, a_test = train_test_split(
        x, y, a, test_size=0.3, random_state=cv_seed)

    train_data = Dataset.from_tensor_slices((x_train, y_train, a_train))
    train_data = train_data.batch(batch_size, drop_remainder=True)

    test_data = Dataset.from_tensor_slices((x_test, y_test, a_test))
    test_data = test_data.batch(batch_size, drop_remainder=True)

    # train below

    opt = Adam(learning_rate=lr)

    model = FairLogisticRegression(xdim, ydim, adim, batch_size, fairdef)
    zhang_train(model, raw_data, train_data, epochs, opt)

    Y, A, Y_hat, A_hat = fair_evaluation(model, test_data)
    clas_acc, dp, deqodds, deqopp, confusion_matrix, metrics_a0, metrics_a1 = compute_metrics(Y, A, Y_hat, A_hat, adim)

    fair_metrics = (dp, deqodds, deqopp)
    tradeoff = []
    for fair_metric in fair_metrics:
        tradeoff.append(compute_tradeoff(clas_acc, fair_metric))

    result = ['Zhang4EqOpp', cv_seed, clas_acc, dp, deqodds, deqopp, tradeoff[0], tradeoff[1], tradeoff[2]] + metrics_a0 + metrics_a1

    results.append(result)

    del(opt)

> Epoch | Class Loss | Adv Loss | Class Acc | Adv Acc
> 1 | 0.900621771812439 | 0.22676822543144226 | 0.3645833333333333 | 0.3506944444444444
> 2 | 0.8830764293670654 | 0.22638536989688873 | 0.3454861111111111 | 0.3506944444444444
> 3 | 0.8670850992202759 | 0.22608187794685364 | 0.3420138888888889 | 0.3506944444444444
> 4 | 0.8523851633071899 | 0.22584685683250427 | 0.3524305555555556 | 0.3506944444444444
> 5 | 0.8361303210258484 | 0.2256619930267334 | 0.3611111111111111 | 0.3506944444444444
> 6 | 0.8195074796676636 | 0.22551551461219788 | 0.3663194444444444 | 0.3506944444444444
> 7 | 0.8045295476913452 | 0.2254023253917694 | 0.3680555555555556 | 0.3506944444444444
> 8 | 0.7906621694564819 | 0.22531771659851074 | 0.3767361111111111 | 0.3506944444444444
> 9 | 0.777219295501709 | 0.22525405883789062 | 0.3836805555555556 | 0.3506944444444444
> 10 | 0.7643619775772095 | 0.22520726919174194 | 0.3975694444444444 | 0.3506944444444444
> 11 | 0.7523151636123657 | 0.22517456114292145 | 0.4079861

## Saving into DF then CSV

In [11]:
result_df = pd.DataFrame(results, columns=header)
result_df

Unnamed: 0,model_name,cv_seed,clas_acc,dp,deqodds,deqopp,trade_dp,trade_deqodds,trade_deqopp,TN_a0,FP_a0,FN_a0,TP_a0,TN_a1,FP_a1,FN_a1,TP_a1
0,Zhang4DP,13,0.722656,0.532001,0.660861,0.540952,0.612843,0.690378,0.61874,12.0,4.0,32.0,43.0,126.0,4.0,31.0,4.0
1,Zhang4DP,29,0.804688,0.448564,0.61787,0.479508,0.576027,0.699012,0.600927,18.0,7.0,14.0,47.0,133.0,5.0,24.0,8.0
2,Zhang4DP,42,0.753906,0.524609,0.634297,0.580282,0.618696,0.688949,0.655797,18.0,10.0,27.0,44.0,126.0,6.0,20.0,5.0
3,Zhang4DP,55,0.761719,0.533776,0.691561,0.525568,0.627694,0.724946,0.621983,16.0,4.0,22.0,42.0,131.0,8.0,27.0,6.0
4,Zhang4DP,73,0.792969,0.401515,0.567789,0.430743,0.533099,0.661747,0.558246,16.0,8.0,12.0,52.0,126.0,5.0,28.0,9.0
5,Zhang4EqOdds,13,0.722656,0.532001,0.660861,0.540952,0.612843,0.690378,0.61874,12.0,4.0,32.0,43.0,126.0,4.0,31.0,4.0
6,Zhang4EqOdds,29,0.804688,0.448564,0.61787,0.479508,0.576027,0.699012,0.600927,18.0,7.0,14.0,47.0,133.0,5.0,24.0,8.0
7,Zhang4EqOdds,42,0.75,0.530979,0.638085,0.580282,0.621765,0.689531,0.654314,18.0,10.0,27.0,44.0,125.0,7.0,20.0,5.0
8,Zhang4EqOdds,55,0.757812,0.521872,0.666561,0.525568,0.618091,0.709264,0.620677,15.0,5.0,22.0,42.0,131.0,8.0,27.0,6.0
9,Zhang4EqOdds,73,0.792969,0.401515,0.567789,0.430743,0.533099,0.661747,0.558246,16.0,8.0,12.0,52.0,126.0,5.0,28.0,9.0


In [12]:
result_df.to_csv(f'{data_name}-result/zhang-{epochs}.csv')