# Testing file 
### where we evaluate Zhang's models using the test set

## Preliminaries

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.keras.optimizers import Adam
from tensorflow.data import Dataset


from util.load_data import load_data
from util.evaluation import *
from models.zhang.models import FairLogisticRegression
from models.zhang.learning import train_loop as zhang_train

2022-07-16 15:44:00.054265: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.10.1


In [2]:
batch_size = 64
epochs = 100
lr = 0.001

In [3]:
cv_seeds = [13, 29, 42, 55, 73]

## Load data

In [4]:
data_name = 'titanic'

In [5]:
x, y, a = load_data(data_name)
raw_data = (x, y, a)

In [6]:
xdim = x.shape[1]
ydim = y.shape[1]
adim = a.shape[1]
zdim = 8

## Result file

In [10]:
header = "model_name", "cv_seed", "clas_acc", "dp", "deqodds", "deqopp", "trade_dp", "trade_deqodds", "trade_deqopp", "TN_a0", "FP_a0", "FN_a0", "TP_a0", "TN_a1", "FP_a1", "FN_a1", "TP_a1"
results = []

## Testing loop
#### Each model is evalueted 5 times
#### In the end of each iteration we save the result

### Zhang for DP

In [11]:
fairdef = 'DemPar'

for cv_seed in cv_seeds:
    x_train, x_test, y_train, y_test, a_train, a_test = train_test_split(
        x, y, a, test_size=0.3, random_state=cv_seed)

    train_data = Dataset.from_tensor_slices((x_train, y_train, a_train))
    train_data = train_data.batch(batch_size, drop_remainder=True)

    test_data = Dataset.from_tensor_slices((x_test, y_test, a_test))
    test_data = test_data.batch(batch_size, drop_remainder=True)

    # train below

    opt = Adam(learning_rate=lr)

    model = FairLogisticRegression(xdim, ydim, adim, batch_size, fairdef)
    zhang_train(model, raw_data, train_data, epochs, opt)

    Y, A, Y_hat, A_hat = fair_evaluation(model, test_data)
    clas_acc, dp, deqodds, deqopp, confusion_matrix, metrics_a0, metrics_a1 = compute_metrics(Y, A, Y_hat, A_hat, adim)

    fair_metrics = (dp, deqodds, deqopp)
    tradeoff = []
    for fair_metric in fair_metrics:
        tradeoff.append(compute_tradeoff(clas_acc, fair_metric))

    result = ['Zhang4DP', cv_seed, clas_acc, dp, deqodds, deqopp, tradeoff[0], tradeoff[1], tradeoff[2]] + metrics_a0 + metrics_a1

    results.append(result)

    del(opt)

> Epoch | Class Loss | Adv Loss | Class Acc | Adv Acc
> 1 | 0.537175178527832 | 0.8347809910774231 | 0.6528514588859416 | 0.3241462201591512
> 2 | 0.48398250341415405 | 0.7067828178405762 | 0.8170590185676393 | 0.34801889920424406
> 3 | 0.46705368161201477 | 0.6304430365562439 | 0.8250994694960212 | 0.6238395225464191
> 4 | 0.45943623781204224 | 0.5849467515945435 | 0.83003149867374 | 0.7131548408488063
> 5 | 0.4555836617946625 | 0.5569688081741333 | 0.8327669098143236 | 0.6960792440318302
> 6 | 0.4535480737686157 | 0.5389748811721802 | 0.8340102785145889 | 0.6819877320954907
> 7 | 0.45262983441352844 | 0.5268917083740234 | 0.8359167771883289 | 0.6783819628647215
> 8 | 0.45246022939682007 | 0.5182324647903442 | 0.8369529177718833 | 0.6776359416445623
> 9 | 0.4527934491634369 | 0.5118634700775146 | 0.8376989389920424 | 0.6775116047745358
> 10 | 0.4534529745578766 | 0.5069818496704102 | 0.8389837533156499 | 0.6781747347480106
> 11 | 0.45430347323417664 | 0.5031645894050598 | 0.8398541114

### Zhang for Eq Odds

In [12]:
fairdef = 'EqOdds'

for cv_seed in cv_seeds:
    x_train, x_test, y_train, y_test, a_train, a_test = train_test_split(
        x, y, a, test_size=0.3, random_state=cv_seed)

    train_data = Dataset.from_tensor_slices((x_train, y_train, a_train))
    train_data = train_data.batch(batch_size, drop_remainder=True)

    test_data = Dataset.from_tensor_slices((x_test, y_test, a_test))
    test_data = test_data.batch(batch_size, drop_remainder=True)

    # train below

    opt = Adam(learning_rate=lr)

    model = FairLogisticRegression(xdim, ydim, adim, batch_size, fairdef)
    zhang_train(model, raw_data, train_data, epochs, opt)

    Y, A, Y_hat, A_hat = fair_evaluation(model, test_data)
    clas_acc, dp, deqodds, deqopp, confusion_matrix, metrics_a0, metrics_a1 = compute_metrics(Y, A, Y_hat, A_hat, adim)

    fair_metrics = (dp, deqodds, deqopp)
    tradeoff = []
    for fair_metric in fair_metrics:
        tradeoff.append(compute_tradeoff(clas_acc, fair_metric))

    result = ['Zhang4EqOdds', cv_seed, clas_acc, dp, deqodds, deqopp, tradeoff[0], tradeoff[1], tradeoff[2]] + metrics_a0 + metrics_a1

    results.append(result)

    del(opt)

> Epoch | Class Loss | Adv Loss | Class Acc | Adv Acc
> 1 | 0.5167679786682129 | 0.7780622243881226 | 0.6641661140583555 | 0.32418766578249336
> 2 | 0.47247856855392456 | 0.6386902928352356 | 0.8193385278514589 | 0.4995440981432361
> 3 | 0.46230050921440125 | 0.5677540302276611 | 0.8244777851458885 | 0.7143153183023873
> 4 | 0.4566110372543335 | 0.5320210456848145 | 0.8289953580901857 | 0.7214439655172413
> 5 | 0.45371437072753906 | 0.5124578475952148 | 0.8313992042440318 | 0.724676724137931
> 6 | 0.4522345960140228 | 0.5009664297103882 | 0.8336372679045093 | 0.7225215517241379
> 7 | 0.4516741931438446 | 0.4937453866004944 | 0.8351707559681698 | 0.7171750663129973
> 8 | 0.451744019985199 | 0.4890083968639374 | 0.8365799071618037 | 0.7148955570291777
> 9 | 0.4522421360015869 | 0.4856780171394348 | 0.8384449602122016 | 0.7140251989389921
> 10 | 0.453009694814682 | 0.4833061099052429 | 0.8392324270557029 | 0.7140251989389921
> 11 | 0.45392122864723206 | 0.4815048575401306 | 0.839688328912

### Zhang for Eq Opp

In [13]:
fairdef = 'EqOpp'

for cv_seed in cv_seeds:
    x_train, x_test, y_train, y_test, a_train, a_test = train_test_split(
        x, y, a, test_size=0.3, random_state=cv_seed)

    train_data = Dataset.from_tensor_slices((x_train, y_train, a_train))
    train_data = train_data.batch(batch_size, drop_remainder=True)

    test_data = Dataset.from_tensor_slices((x_test, y_test, a_test))
    test_data = test_data.batch(batch_size, drop_remainder=True)

    # train below

    opt = Adam(learning_rate=lr)

    model = FairLogisticRegression(xdim, ydim, adim, batch_size, fairdef)
    zhang_train(model, raw_data, train_data, epochs, opt)

    Y, A, Y_hat, A_hat = fair_evaluation(model, test_data)
    clas_acc, dp, deqodds, deqopp, confusion_matrix, metrics_a0, metrics_a1 = compute_metrics(Y, A, Y_hat, A_hat, adim)

    fair_metrics = (dp, deqodds, deqopp)
    tradeoff = []
    for fair_metric in fair_metrics:
        tradeoff.append(compute_tradeoff(clas_acc, fair_metric))

    result = ['Zhang4EqOpp', cv_seed, clas_acc, dp, deqodds, deqopp, tradeoff[0], tradeoff[1], tradeoff[2]] + metrics_a0 + metrics_a1

    results.append(result)

    del(opt)

> Epoch | Class Loss | Adv Loss | Class Acc | Adv Acc
> 1 | 0.5352369546890259 | 0.3454836308956146 | 0.6534316976127321 | 0.3241462201591512
> 2 | 0.4807005226612091 | 0.2549954652786255 | 0.8173905835543767 | 0.3957228116710875
> 3 | 0.4652402698993683 | 0.21041643619537354 | 0.8256382625994695 | 0.5182775198938993
> 4 | 0.45960766077041626 | 0.18977224826812744 | 0.8291196949602122 | 0.5750165782493368
> 5 | 0.4571208953857422 | 0.17857737839221954 | 0.8316478779840849 | 0.6185759283819628
> 6 | 0.45613378286361694 | 0.1713542342185974 | 0.834051724137931 | 0.6511521883289124
> 7 | 0.45593687891960144 | 0.16730940341949463 | 0.8348806366047745 | 0.6744446286472149
> 8 | 0.45616504549980164 | 0.16622084379196167 | 0.8359167771883289 | 0.6803713527851459
> 9 | 0.45682814717292786 | 0.16606619954109192 | 0.8370358090185677 | 0.6821120689655172
> 10 | 0.457761287689209 | 0.1664421409368515 | 0.8385692970822282 | 0.6828580901856764
> 11 | 0.4588456451892853 | 0.16705381870269775 | 0.8392

## Saving into DF then CSV

In [14]:
result_df = pd.DataFrame(results, columns=header)
result_df

Unnamed: 0,model_name,clas_acc,dp,deqodds,deqopp,trade_dp,trade_deqodds,trade_deqopp,TN_a0,FP_a0,FN_a0,TP_a0,TN_a1,FP_a1,FN_a1,TP_a1
0,Zhang4DP,0.850066,0.811667,0.902038,0.880965,0.830423,0.875281,0.86524,1718.0,40.0,101.0,100.0,2521.0,279.0,482.0,775.0
1,Zhang4EqOdds,0.850399,0.81216,0.902395,0.880965,0.83084,0.875626,0.865412,1718.0,40.0,101.0,100.0,2523.0,277.0,482.0,775.0
2,Zhang4EqOpp,0.850399,0.816139,0.906203,0.884943,0.832917,0.877414,0.867327,1716.0,42.0,101.0,100.0,2530.0,270.0,487.0,770.0


In [15]:
result_df.to_csv(f'{data_name}-result/zhang-{epochs}.csv')