# Testing file 
### where we evaluate Zhang's models using the test set

## Preliminaries

In [1]:
from math import sqrt, isnan
from pathlib import Path

import tensorflow as tf
from tensorflow.keras.optimizers import Adam, Adagrad
from tensorflow.data import Dataset

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import csv

from util import metrics
from util.load_data import load_data
from util.evaluation import *

from zhang.models import FairLogisticRegression
from zhang.learning import train_loop as zhang_train

In [2]:
batch_size = 64
epochs = 100
lr = 0.001
opt = Adam(learning_rate=lr)

In [3]:
test_loop = 5

## Load data

In [4]:
x_train, y_train, a_train = load_data('adult', 'train')
raw_data = (x_train, y_train, a_train)

In [5]:
xdim = x_train.shape[1]
ydim = y_train.shape[1]
adim = a_train.shape[1]
zdim = 8

In [6]:
train_data = Dataset.from_tensor_slices((x_train, y_train, a_train))
train_data = train_data.batch(batch_size, drop_remainder=True)
train_data

<BatchDataset shapes: ((64, 113), (64, 1), (64, 1)), types: (tf.float64, tf.float64, tf.float64)>

In [7]:
x_valid, y_valid, a_valid = load_data('adult', 'valid')

valid_data = Dataset.from_tensor_slices((x_valid, y_valid, a_valid))
valid_data = valid_data.batch(batch_size, drop_remainder=True)

In [8]:
x_test, y_test, a_test = load_data('adult', 'test')

test_data = Dataset.from_tensor_slices((x_test, y_test, a_test))
test_data = test_data.batch(batch_size, drop_remainder=True)

## Result file

In [9]:
header = "model_name", "clas_acc", "dp", "deqodds", "deqopp", "trade_dp", "trade_deqodds", "trade_deqopp", "TN_a0", "FP_a0", "FN_a0", "TP_a0", "TN_a1", "FP_a1", "FN_a1", "TP_a1"
results = []

## Testing loop
#### Each model is evalueted 5 times
#### In the end of each iteration we save the result

### Zhang for DP

In [10]:
fairdef = 'DemPar'

for i in range(test_loop):
    opt = Adam(learning_rate=lr)
    model = FairLogisticRegression(xdim, batch_size, fairdef)
    zhang_train(model, raw_data, train_data, epochs, opt)

    Y_hat, A_hat = evaluation(model, valid_data)
    clas_acc, dp, deqodds, deqopp, confusion_matrix, metrics_a0, metrics_a1 = compute_metrics(y_valid, Y_hat, a_valid, A_hat)

    fair_metrics = (dp, deqodds, deqopp)
    tradeoff = []
    for fair_metric in fair_metrics:
        tradeoff.append(compute_tradeoff(clas_acc, fair_metric))

    result = ['Zhang4DP', clas_acc, dp, deqodds, deqopp, tradeoff[0], tradeoff[1], tradeoff[2]] + metrics_a0 + metrics_a1

    results.append(result)

> Epoch | Class Loss | Adv Loss | Class Acc | Adv Acc
> 1 | 0.41985824704170227 | 0.7958990931510925 | 0.7547662466843501 | 0.3273375331564987
> 2 | 1.0335720777511597 | 0.6609182357788086 | 0.8154011936339522 | 0.39750497347480107
> 3 | 1.0165108442306519 | 0.6007074117660522 | 0.8217009283819628 | 0.6755636604774535
> 4 | 0.38078200817108154 | 0.5728242993354797 | 0.8353365384615384 | 0.7239721485411141
> 5 | 0.3800588548183441 | 0.5540517568588257 | 0.8364555702917772 | 0.7032493368700266
> 6 | 0.379718154668808 | 0.543022871017456 | 0.8381133952254642 | 0.6859250663129973
> 7 | 0.3760422170162201 | 0.5348333120346069 | 0.8409731432360743 | 0.681117374005305
> 8 | 0.3707612156867981 | 0.5279960632324219 | 0.8414704907161804 | 0.6799568965517242
> 9 | 0.3673875331878662 | 0.5226851105690002 | 0.840434350132626 | 0.6809101458885942
> 10 | 0.3542713224887848 | 0.5111318826675415 | 0.8399784482758621 | 0.6824850795755968
> 11 | 0.36238372325897217 | 0.5156087279319763 | 0.84540782493368

### Zhang for Eq Odds

In [11]:
fairdef = 'EqOdds'

for i in range(test_loop):
    opt = Adam(learning_rate=lr)
    model = FairLogisticRegression(xdim, batch_size, fairdef)
    zhang_train(model, raw_data, train_data, epochs, opt)

    Y_hat, A_hat = evaluation(model, valid_data)
    clas_acc, dp, deqodds, deqopp, confusion_matrix, metrics_a0, metrics_a1 = compute_metrics(y_valid, Y_hat, a_valid, A_hat)

    fair_metrics = (dp, deqodds, deqopp)
    tradeoff = []
    for fair_metric in fair_metrics:
        tradeoff.append(compute_tradeoff(clas_acc, fair_metric))

    result = ['Zhang4EqOdds', clas_acc, dp, deqodds, deqopp, tradeoff[0], tradeoff[1], tradeoff[2]] + metrics_a0 + metrics_a1

    results.append(result)

> Epoch | Class Loss | Adv Loss | Class Acc | Adv Acc
> 1 | 0.8880362510681152 | 0.7379366159439087 | 0.7536057692307692 | 0.3326840185676393
> 2 | 1.0340898036956787 | 0.5918917655944824 | 0.8192556366047745 | 0.552801724137931
> 3 | 1.0256679058074951 | 0.5437179803848267 | 0.8197944297082228 | 0.7243451591511936
> 4 | 0.7730251550674438 | 0.5275682806968689 | 0.8220739389920424 | 0.7345407824933687
> 5 | 0.3722943663597107 | 0.49669283628463745 | 0.8312748673740054 | 0.7422911140583555
> 6 | 0.8259875774383545 | 0.5185039043426514 | 0.8268401856763926 | 0.7451923076923077
> 7 | 0.38068997859954834 | 0.491730272769928 | 0.8404757957559682 | 0.7443633952254642
> 8 | 1.0263278484344482 | 0.5150231122970581 | 0.8297413793103449 | 0.7441147214854111
> 9 | 0.3776051700115204 | 0.4860036373138428 | 0.8351293103448276 | 0.7446949602122016
> 10 | 0.3755377531051636 | 0.48168760538101196 | 0.8407244694960212 | 0.7461870026525199
> 11 | 0.37114769220352173 | 0.4665905833244324 | 0.837657493368

### Zhang for Eq Opp

In [12]:
fairdef = 'EqOpp'

for i in range(test_loop):
    opt = Adam(learning_rate=lr)
    model = FairLogisticRegression(xdim, batch_size, fairdef)
    zhang_train(model, raw_data, train_data, epochs, opt)

    Y_hat, A_hat = evaluation(model, valid_data)
    clas_acc, dp, deqodds, deqopp, confusion_matrix, metrics_a0, metrics_a1 = compute_metrics(y_valid, Y_hat, a_valid, A_hat)

    fair_metrics = (dp, deqodds, deqopp)
    tradeoff = []
    for fair_metric in fair_metrics:
        tradeoff.append(compute_tradeoff(clas_acc, fair_metric))

    result = ['Zhang4EqOpp', clas_acc, dp, deqodds, deqopp, tradeoff[0], tradeoff[1], tradeoff[2]] + metrics_a0 + metrics_a1

    results.append(result)

> Epoch | Class Loss | Adv Loss | Class Acc | Adv Acc
> 1 | 0.4192975163459778 | 0.2229214310646057 | 0.7571286472148542 | 0.32949270557029176
> 2 | 0.7964738011360168 | 0.1485237181186676 | 0.8156498673740054 | 0.4253564323607427
> 3 | 0.3927888870239258 | 0.13469451665878296 | 0.8318551061007957 | 0.548988726790451
> 4 | 0.3991836905479431 | 0.12518563866615295 | 0.8296584880636605 | 0.6181614721485411
> 5 | 0.3763328790664673 | 0.10475597530603409 | 0.8423822944297082 | 0.661223474801061
> 6 | 0.3863135874271393 | 0.11473175883293152 | 0.8447861405835544 | 0.6779675066312998
> 7 | 1.0237442255020142 | 0.09862307459115982 | 0.8396883289124668 | 0.683479774535809
> 8 | 1.0187339782714844 | 0.09844865649938583 | 0.8311505305039788 | 0.6864638594164456
> 9 | 0.41885995864868164 | 0.09838911890983582 | 0.8317307692307692 | 0.6864224137931034
> 10 | 0.34886518120765686 | 0.09960773587226868 | 0.8454907161803713 | 0.6851790450928382
> 11 | 0.3664979338645935 | 0.09839144349098206 | 0.84404

## Saving into DF then CSV

In [13]:
result_df = pd.DataFrame(results, columns=header)
result_df

Unnamed: 0,model_name,clas_acc,dp,deqodds,deqopp,trade_dp,trade_deqodds,trade_deqopp,TN_a0,FP_a0,FN_a0,TP_a0,TN_a1,FP_a1,FN_a1,TP_a1
0,Zhang4DP,0.831449,0.808436,0.919858,0.938459,0.819781,0.873422,0.881719,1610.0,57.0,93.0,119.0,2472.0,379.0,485.0,801.0
1,Zhang4DP,0.831449,0.808436,0.919858,0.938459,0.819781,0.873422,0.881719,1610.0,57.0,93.0,119.0,2472.0,379.0,485.0,801.0
2,Zhang4DP,0.831449,0.808436,0.919858,0.938459,0.819781,0.873422,0.881719,1610.0,57.0,93.0,119.0,2472.0,379.0,485.0,801.0
3,Zhang4DP,0.831449,0.808436,0.919858,0.938459,0.819781,0.873422,0.881719,1610.0,57.0,93.0,119.0,2472.0,379.0,485.0,801.0
4,Zhang4DP,0.831449,0.808436,0.919858,0.938459,0.819781,0.873422,0.881719,1610.0,57.0,93.0,119.0,2472.0,379.0,485.0,801.0
5,Zhang4EqOdds,0.828125,0.799634,0.901084,0.907824,0.81363,0.863065,0.866145,1611.0,56.0,99.0,113.0,2454.0,397.0,482.0,804.0
6,Zhang4EqOdds,0.828125,0.799634,0.901084,0.907824,0.81363,0.863065,0.866145,1611.0,56.0,99.0,113.0,2454.0,397.0,482.0,804.0
7,Zhang4EqOdds,0.828125,0.799634,0.901084,0.907824,0.81363,0.863065,0.866145,1611.0,56.0,99.0,113.0,2454.0,397.0,482.0,804.0
8,Zhang4EqOdds,0.828125,0.799634,0.901084,0.907824,0.81363,0.863065,0.866145,1611.0,56.0,99.0,113.0,2454.0,397.0,482.0,804.0
9,Zhang4EqOdds,0.828125,0.799634,0.901084,0.907824,0.81363,0.863065,0.866145,1611.0,56.0,99.0,113.0,2454.0,397.0,482.0,804.0


In [14]:
result_df.to_csv('results/validation_zhang-100.csv')