# Testing file 
### where we evaluate Zhang's models using the test set

## Preliminaries

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.keras.optimizers import Adam
from tensorflow.data import Dataset


from util.load_data import load_data
from util.evaluation import *
from models.zhang.models import FairLogisticRegression
from models.zhang.learning import train_loop as zhang_train

In [2]:
batch_size = 64
epochs = 100
lr = 0.001
opt = Adam(learning_rate=lr)

In [3]:
cv_seeds = [13, 29, 42, 55, 73]

## Load data

In [4]:
data_name = 'adult-race'

In [5]:
x, y, a = load_data(data_name)
raw_data = (x, y, a)

In [6]:
xdim = x.shape[1]
ydim = y.shape[1]
adim = a.shape[1]
zdim = 8

## Result file

In [7]:
header = "model_name", "cv_seed", "clas_acc", "dp", "deqodds", "deqopp", "trade_dp", "trade_deqodds", "trade_deqopp"
results = []

## Testing loop
#### Each model is evalueted 5 times
#### In the end of each iteration we save the result

### Zhang for DP

In [8]:
fairdef = 'DemPar'

for cv_seed in cv_seeds:
    x_train, x_test, y_train, y_test, a_train, a_test = train_test_split(
        x, y, a, test_size=0.3, random_state=cv_seed)

    train_data = Dataset.from_tensor_slices((x_train, y_train, a_train))
    train_data = train_data.batch(batch_size, drop_remainder=True)

    test_data = Dataset.from_tensor_slices((x_test, y_test, a_test))
    test_data = test_data.batch(batch_size, drop_remainder=True)

    # train below

    opt = Adam(learning_rate=lr)
    
    model = FairLogisticRegression(xdim, ydim, adim, batch_size, fairdef)
    zhang_train(model, raw_data, train_data, epochs, opt)

    Y, A, Y_hat, A_hat = fair_evaluation(model, test_data)
    clas_acc, dp, deqodds, deqopp, confusion_matrix = compute_metrics(Y, A, Y_hat, A_hat, adim)

    fair_metrics = (dp, deqodds, deqopp)
    tradeoff = []
    for fair_metric in fair_metrics:
        tradeoff.append(compute_tradeoff(clas_acc, fair_metric))

    result = ['Zhang4DP', cv_seed, clas_acc, dp, deqodds, deqopp, tradeoff[0], tradeoff[1], tradeoff[2]]

    results.append(result)

    del(opt)

> Epoch | Class Loss | Adv Loss | Class Acc | Adv Acc
> 1 | 0.5062878131866455 | 1.3792097568511963 | 0.6793079453441295 | 0.03703820850202429
> 2 | 0.41056427359580994 | 1.2647855281829834 | 0.8180035425101214 | 0.044502783400809716
> 3 | 0.38041844964027405 | 1.1947553157806396 | 0.8277138157894737 | 0.17630313765182187
> 4 | 0.3622959554195404 | 1.1522235870361328 | 0.8309400303643725 | 0.28128162955465585
> 5 | 0.3505289852619171 | 1.125307321548462 | 0.8327745445344129 | 0.3479251012145749
> 6 | 0.34253060817718506 | 1.1082110404968262 | 0.8348937246963563 | 0.3848684210526316
> 7 | 0.3368930518627167 | 1.0975706577301025 | 0.8358109817813765 | 0.4089068825910931
> 8 | 0.33279094099998474 | 1.0914968252182007 | 0.8368863866396761 | 0.426271508097166
> 9 | 0.32971641421318054 | 1.0886415243148804 | 0.8378036437246964 | 0.4381958502024291
> 10 | 0.3273443579673767 | 1.0880119800567627 | 0.838119939271255 | 0.4486652327935223
> 11 | 0.3254619836807251 | 1.0890717506408691 | 0.8386260

### Zhang for Eq Odds

In [9]:
fairdef = 'EqOdds'

for cv_seed in cv_seeds:
    x_train, x_test, y_train, y_test, a_train, a_test = train_test_split(
        x, y, a, test_size=0.3, random_state=cv_seed)

    train_data = Dataset.from_tensor_slices((x_train, y_train, a_train))
    train_data = train_data.batch(batch_size, drop_remainder=True)

    test_data = Dataset.from_tensor_slices((x_test, y_test, a_test))
    test_data = test_data.batch(batch_size, drop_remainder=True)

    # train below

    opt = Adam(learning_rate=lr)
    
    model = FairLogisticRegression(xdim, ydim, adim, batch_size, fairdef)
    zhang_train(model, raw_data, train_data, epochs, opt)

    Y, A, Y_hat, A_hat = fair_evaluation(model, test_data)
    clas_acc, dp, deqodds, deqopp, confusion_matrix = compute_metrics(Y, A, Y_hat, A_hat, adim)

    fair_metrics = (dp, deqodds, deqopp)
    tradeoff = []
    for fair_metric in fair_metrics:
        tradeoff.append(compute_tradeoff(clas_acc, fair_metric))

    result = ['Zhang4EqOdds', cv_seed, clas_acc, dp, deqodds, deqopp, tradeoff[0], tradeoff[1], tradeoff[2]]

    results.append(result)

    del(opt)

> Epoch | Class Loss | Adv Loss | Class Acc | Adv Acc
> 1 | 0.7597708106040955 | 0.8592720627784729 | 0.5248924595141701 | 0.1593496963562753
> 2 | 0.6748502850532532 | 0.609418511390686 | 0.5608236336032388 | 0.6907578441295547
> 3 | 0.5936461687088013 | 0.6569896936416626 | 0.6614056174089069 | 0.7263410931174089
> 4 | 0.4373527765274048 | 0.9148008823394775 | 0.7940915991902834 | 0.5520306174089069
> 5 | 0.37993115186691284 | 0.8894854187965393 | 0.8290422570850202 | 0.5291624493927125
> 6 | 0.3617430329322815 | 0.9166097640991211 | 0.8326796558704453 | 0.5486778846153846
> 7 | 0.3510138690471649 | 0.9358741044998169 | 0.834703947368421 | 0.5468117408906883
> 8 | 0.3437066674232483 | 0.951775312423706 | 0.8360323886639676 | 0.5466535931174089
> 9 | 0.33839720487594604 | 0.9653966426849365 | 0.8371394230769231 | 0.5472545546558705
> 10 | 0.33437687158584595 | 0.9773821830749512 | 0.8380883097165992 | 0.5495318825910931
> 11 | 0.3312371075153351 | 0.9876807332038879 | 0.83922697368421

### Zhang for Eq Opp

In [10]:
fairdef = 'EqOpp'

for cv_seed in cv_seeds:
    x_train, x_test, y_train, y_test, a_train, a_test = train_test_split(
        x, y, a, test_size=0.3, random_state=cv_seed)

    train_data = Dataset.from_tensor_slices((x_train, y_train, a_train))
    train_data = train_data.batch(batch_size, drop_remainder=True)

    test_data = Dataset.from_tensor_slices((x_test, y_test, a_test))
    test_data = test_data.batch(batch_size, drop_remainder=True)

    # train below

    opt = Adam(learning_rate=lr)
    
    model = FairLogisticRegression(xdim, ydim, adim, batch_size, fairdef)
    zhang_train(model, raw_data, train_data, epochs, opt)

    Y, A, Y_hat, A_hat = fair_evaluation(model, test_data)
    clas_acc, dp, deqodds, deqopp, confusion_matrix = compute_metrics(Y, A, Y_hat, A_hat, adim)

    fair_metrics = (dp, deqodds, deqopp)
    tradeoff = []
    for fair_metric in fair_metrics:
        tradeoff.append(compute_tradeoff(clas_acc, fair_metric))

    result = ['Zhang4EqOpp', cv_seed, clas_acc, dp, deqodds, deqopp, tradeoff[0], tradeoff[1], tradeoff[2]]

    results.append(result)

    del(opt)

> Epoch | Class Loss | Adv Loss | Class Acc | Adv Acc
> 1 | 0.44645968079566956 | 0.24801447987556458 | 0.6990764170040485 | 0.04564144736842105
> 2 | 0.37540388107299805 | 0.21866849064826965 | 0.8256578947368421 | 0.160646508097166
> 3 | 0.3487129807472229 | 0.2001083493232727 | 0.8291371457489879 | 0.20960905870445345
> 4 | 0.3369394540786743 | 0.19515419006347656 | 0.8318256578947368 | 0.24493927125506074
> 5 | 0.3302115797996521 | 0.19348417222499847 | 0.8341346153846154 | 0.26515055668016196
> 6 | 0.3258592486381531 | 0.19263917207717896 | 0.8354314271255061 | 0.2765371963562753
> 7 | 0.32293620705604553 | 0.1921166479587555 | 0.8372975708502024 | 0.2830212550607287
> 8 | 0.3209080100059509 | 0.19173121452331543 | 0.8384362348178138 | 0.2881452429149798
> 9 | 0.319448322057724 | 0.1914454996585846 | 0.839132085020243 | 0.290043016194332
> 10 | 0.3183511197566986 | 0.1912785768508911 | 0.840080971659919 | 0.29102353238866396
> 11 | 0.3174861967563629 | 0.1912114918231964 | 0.84077

## Saving into DF then CSV

In [12]:
result_df = pd.DataFrame(results, columns=header)
result_df

Unnamed: 0,model_name,cv_seed,clas_acc,dp,deqodds,deqopp,trade_dp,trade_deqodds,trade_deqopp
0,Zhang4DP,13,0.850118,0.167272,0.792174,0.643422,0.279541,0.820124,0.732467
1,Zhang4DP,29,0.848415,0.17822,0.794827,0.654669,0.294564,0.820747,0.739055
2,Zhang4DP,42,0.852858,0.181136,0.753862,0.580632,0.298809,0.80031,0.690897
3,Zhang4DP,55,0.851155,0.151314,0.714448,0.487607,0.256949,0.776833,0.62002
4,Zhang4DP,73,0.846786,0.160116,0.75437,0.570038,0.269309,0.797911,0.681383
5,Zhang4EqOdds,13,0.849378,0.159445,0.781752,0.616987,0.268489,0.814163,0.714767
6,Zhang4EqOdds,29,0.849156,0.17194,0.781615,0.62452,0.285975,0.813987,0.719717
7,Zhang4EqOdds,42,0.853007,0.174075,0.751493,0.570092,0.289144,0.799039,0.683427
8,Zhang4EqOdds,55,0.851229,0.144379,0.711039,0.474568,0.246883,0.774844,0.609393
9,Zhang4EqOdds,73,0.847675,0.150208,0.735393,0.527243,0.255196,0.787552,0.65012


In [13]:
result_df.to_csv(f'{data_name}-result/zhang-{epochs}.csv')