## Import libs

In [1]:
from math import sqrt, isnan
from pathlib import Path

import tensorflow as tf
from tensorflow.keras.optimizers import Adam, Adagrad
from tensorflow.data import Dataset

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from util import metrics
from util.load_data import load_data
from util.evaluation import *

from fairgan.models import *
from fairgan.learning import *

## Preliminaries

In [2]:
batch_size = 64
epochs = 10
learning_rate = 0.001
ae_opt = Adam(learning_rate=learning_rate)

In [3]:
learning_rate = 0.001
gen_opt = Adam(learning_rate=learning_rate)
disc_opt = Adam(learning_rate=learning_rate)
d2_opt = Adam(learning_rate=learning_rate)

## Load data

In [4]:
x_train, y_train, a_train = load_data('adult', 'train')
raw_data = (x_train, y_train, a_train)

In [5]:
x_train[:,-1]

array([0., 0., 0., ..., 1., 0., 0.])

In [6]:
'''For the FairGAN model our vector X must not include the protected atribute'''
x_train = x_train[:,:-1]

In [7]:
#a_train.reshape(24128,)

In [8]:
#any(x_train[:,-1] - a_train.reshape(24128,))

In [9]:
xdim = x_train.shape[1]
ydim = y_train.shape[1]
adim = a_train.shape[1]
noise_dim = xdim + ydim

In [10]:
xdim

111

In [11]:
x_train[0]

array([0.        , 0.        , 0.        , 1.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 1.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 1.        , 0.        ,
       0.        , 0.        , 1.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 1.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 1.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 1.        , 0.        , 0.37901745, 0.39795918,
       0.        , 0.        , 0.        , 0.        , 0.     

In [12]:
y_train[0]

array([0.])

In [13]:
a_train[0]

array([0.])

In [14]:
train_data = Dataset.from_tensor_slices((x_train, y_train, a_train))
train_data = train_data.batch(batch_size, drop_remainder=True)
train_data

<BatchDataset shapes: ((64, 111), (64, 1), (64, 1)), types: (tf.float64, tf.float64, tf.float64)>

In [15]:
x_valid, y_valid, a_valid = load_data('adult', 'valid')

valid_data = Dataset.from_tensor_slices((x_valid, y_valid, a_valid))
valid_data = valid_data.batch(batch_size, drop_remainder=True)

In [16]:
x_test, y_test, a_test = load_data('adult', 'test')

test_data = Dataset.from_tensor_slices((x_test, y_test, a_test))
test_data = test_data.batch(batch_size, drop_remainder=True)

## Testing if models are being trained

### For the Autoencoder

In [None]:
ae = Autoencoder(xdim, ydim, adim, batch_size)
pre_train_loop(ae, raw_data, train_data, epochs=2, opt=ae_opt)

### FairGAN

In [None]:
###V1
fairgan = FairGAN(xdim, ydim, adim, ae.dec, batch_size)

train_loop(fairgan, raw_data, train_data, batch_size, noise_dim, epochs=2, phase='v1', gen_opt=gen_opt, d1_opt=d1_opt)

In [None]:
#fairgan.gdec.variables

In [None]:
###V2
#fairgan = FairGAN(xdim, ydim, adim, ae.dec, batch_size)

train_loop(fairgan, raw_data, train_data, batch_size, noise_dim, epochs=2, phase='v2', gen_opt=gen_opt, d1_opt=d1_opt, d2_opt=d2_opt)

In [None]:
#tf.random.normal([batch_size, noise_dim])

In [None]:
gdec_data = []
gen_data = []
for X, Y, A in train_data:
    noise = tf.random.normal([batch_size, noise_dim])
    zeros = tf.zeros([batch_size//2, 1])
    ones = tf.ones([batch_size//2, 1])
    gen_As = tf.concat([zeros, ones], axis=0)
    batch_gdec_data, batch_gen_data = fairgan.gdec(noise=noise, A=gen_As, batch_size=batch_size)
    gdec_data.append(batch_gdec_data)
    gen_data.append(batch_gen_data)

In [None]:
tensor_gen_data = tf.convert_to_tensor(gdec_data)
tensor_gen_data[0][4]
#tensor_gen_data

In [None]:
gen_data[0][4]

In [None]:
fairgan.gen.variables

In [None]:
x_gen = tensor_gen_data[:,:,:-2]
y_gen = tensor_gen_data[:,:,-2]
a_gen = tensor_gen_data[:,:,-1]

In [None]:
#x_gen

In [None]:
num_batches = x_gen.shape[0]
new_shape = num_batches*batch_size

In [None]:
x_gen = tf.reshape(x_gen,shape=(new_shape, xdim))
y_gen = tf.reshape(y_gen,shape=(new_shape, ydim))
a_gen = tf.reshape(a_gen,shape=(new_shape, adim))

In [None]:
a_gen

In [None]:
unbatched_train = train_data.unbatch()
unbatched_train = list(unbatched_train.as_numpy_iterator())

x, y, a = [], [], []

for row in unbatched_train:
    x.append(row[0])
    y.append(row[1])
    a.append(row[2])


In [None]:
x = tf.convert_to_tensor(x)
y = tf.convert_to_tensor(y)
a = tf.convert_to_tensor(a)

In [None]:
tf.concat((x_gen, y_gen),1)[4]

In [None]:
xy = metrics.euclidian_dist(tf.concat((x, y), 1), tf.concat((x_gen, y_gen), 1))
print(xy)
xya = metrics.euclidian_dist(tf.concat((x, y, a), 1), tf.concat((x_gen, y_gen, a_gen), 1))
print(xya)
cond_a0, cond_a1 = metrics.euclidian_dist((tf.concat((x, y), 1), a), (tf.concat((x_gen, y_gen), 1), a_gen), conditional=True)
print(cond_a0, cond_a1)

In [None]:
def euclidian_dist(real_data, gen_data, conditional=False):

        '''if conditional:
        a0_filter4real_data = filtering(mask=1-real_data[1], data=real_data[0])
        a0_filter4gen_data = filtering(mask=1-gen_data[1], data=gen_data[0])

        a1_filter4real_data = filtering(mask=real_data[1], data=real_data[0])
        a1_filter4gen_data = filtering(mask=gen_data[1], data=gen_data[0])
        
        ed4a0 = euclidian_dist(a0_filter4real_data, a0_filter4gen_data)
        ed4a1 = euclidian_dist(a1_filter4real_data, a1_filter4gen_data)
        
        return (ed4a0, ed4a1)'''

    #else:
        real_data = tf.dtypes.cast(real_data, tf.double)
        gen_data = tf.dtypes.cast(gen_data, tf.double)

        if real_data.shape[0] > gen_data.shape[0]:
            real_data = real_data[:gen_data.shape[0]]
        elif real_data.shape[0] < gen_data.shape[0]:
            gen_data = gen_data[:real_data.shape[0]]
        else:
            pass

        '''return tf.sqrt(
                    tf.math.reduce_sum(
                        tf.math.squared_difference(real_data, gen_data)))'''
        return tf.norm((real_data- gen_data), ord='euclidean')

In [None]:
xy = euclidian_dist(tf.concat((x, y), 1), tf.concat((x_gen, y_gen), 1))
print(xy)

In [None]:
stop
Y_hat, A_hat = validation(model, valid_data)
ret = compute_metrics(y_valid, Y_hat, a_valid, A_hat)

In [None]:
sns.heatmap(ret[4], annot=True, fmt='g')

### For EqOdds

In [None]:
fairdef='EqOdds'
model = FairLogisticRegression(xdim, batch_size, fairdef)
ret = train_loop(model, raw_data, train_data, 1)

In [None]:
Y_hat, A_hat = validation(model, valid_data)
ret = compute_metrics(y_valid, Y_hat, a_valid, A_hat)

In [None]:
sns.heatmap(ret[4], annot=True, fmt='g')

### For EqOpp

In [None]:
fairdef='EqOpp'
model = FairLogisticRegression(xdim, batch_size, fairdef)
ret = train_loop(model, raw_data, train_data, 1)

In [None]:
Y_hat, A_hat = validation(model, valid_data)
ret = compute_metrics(y_valid, Y_hat, a_valid, A_hat)

In [None]:
sns.heatmap(ret[4], annot=True, fmt='g')

## Models

In [None]:
cmap = sns.light_palette("seagreen", as_cmap=True)

In [None]:
#For DemPar
fairdef = 'DemPar'
model = FairLogisticRegression(xdim, batch_size, fairdef)
train_loop(model, raw_data, train_data, epochs)
print('Validation')
Y_hat, A_hat = validation(model, valid_data)
ret4dp = compute_metrics(y_valid, Y_hat, a_valid, A_hat)

In [None]:
sns.heatmap(ret4dp[4], cmap=cmap, annot=True, fmt='g')

In [None]:
#For EqOdds
fairdef = 'EqOdds'
model = FairLogisticRegression(xdim, batch_size, fairdef)
train_loop(model, raw_data, train_data, epochs)
print('Validation')
Y_hat, A_hat = validation(model, valid_data)
ret4eqodds = compute_metrics(y_valid, Y_hat, a_valid, A_hat)

In [None]:
sns.heatmap(ret4eqodds[4], cmap=cmap, annot=True, fmt='g')

In [None]:
#For EqOpp
fairdef = 'EqOpp'
model = FairLogisticRegression(xdim, batch_size, fairdef)
train_loop(model, raw_data, train_data, epochs)
print('Validation')
Y_hat, A_hat = validation(model, valid_data)
ret4eqopp = compute_metrics(y_valid, Y_hat, a_valid, A_hat)

In [None]:
sns.heatmap(ret4eqopp[4], cmap=cmap, annot=True, fmt='g')

In [None]:
#x=[1,2,3]
#y = [tf.math.reduce_mean(ret4dp[0]),tf.math.reduce_mean(ret4eqodds[0]),tf.math.reduce_mean(ret4eqodds[0])]
y=[2,4,6]
#plt.scatter()
#plt.pyplot.grid(b=True, color='darkgrey')
sns.set_style('darkgrid')
plt.ylabel('Accuracy')

'''# plotting the line 1 points 
plt.plot(fair_coeff_list, laftr_accs, label = "laftr")
# line 2 points'''

# plotting the line 2 points 
#plt.scatter(x, y)

plt.scatter(1, ret4dp[0], label = "ZHANG DemPar")

plt.scatter(2, ret4eqodds[0], label = "ZHANG EqOdds")

plt.scatter(3, ret4eqopp[0], label = "ZHANG EqOpp")

#plt.xlabel('Fair coefficient')
plt.legend()

plt.show()

In [None]:
plt.plot()
plt.ylabel('Accuracy')

'''# plotting the line 1 points 
plt.plot(fair_coeff_list, laftr_dps, label = "laftr")
# line 2 points'''
#axes = plt.gca()
#axes.set_xlim([0.0643,0.0645])
plt.scatter(ret4dp[1], ret4dp[0], label = "ZHANG DemPar")

plt.scatter(ret4eqodds[1], ret4eqodds[0], label = "ZHANG EqOdds")

plt.scatter(ret4eqopp[1], ret4eqopp[0], label = "ZHANG EqOpp")

plt.xlabel('\u0394DP')
plt.legend()
plt.show()

In [None]:
plt.plot()
plt.ylabel('Accuracy')

'''# plotting the line 1 points 
plt.plot(fair_coeff_list, laftr_dps, label = "laftr")
# line 2 points'''

plt.scatter(ret4dp[2], ret4dp[0], label = "ZHANG DemPar")

plt.scatter(ret4eqodds[2], ret4eqodds[0], label = "ZHANG EqOdds")

plt.scatter(ret4eqopp[2], ret4eqopp[0], label = "ZHANG EqOpp")

plt.xlabel('\u0394EqOdds')
plt.legend()
plt.show()

In [None]:
plt.plot()
plt.ylabel('Accuracy')

'''# plotting the line 1 points 
plt.plot(fair_coeff_list, laftr_dps, label = "laftr")
# line 2 points'''

plt.scatter(ret4dp[3], ret4dp[0], label = "ZHANG DemPar")

plt.scatter(ret4eqodds[3], ret4eqodds[0], label = "ZHANG EqOdds")

plt.scatter(ret4eqopp[3], ret4eqopp[0], label = "ZHANG EqOpp")

plt.xlabel('\u0394EqOpp')
plt.legend()
plt.show()