## Import libs

In [1]:
from math import sqrt, isnan
from pathlib import Path

import tensorflow as tf
from tensorflow.keras.optimizers import Adam, Adagrad
from tensorflow.data import Dataset

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from util import metrics
from util.load_data import load_data
from util.evaluation import *

In [2]:
from experiment.ae_learning import *
from experiment.ae_model import Autoencoder2
from experiment.models import Generator2
from experiment.models import Discriminator2
from experiment.learning import *

## Preliminaries

In [3]:
batch_size = 64
epochs = 10

In [4]:
learning_rate = 0.001
ae_opt = Adam(learning_rate=learning_rate)

In [5]:
learning_rate = 0.0001
beta_1 = 0.5
gen_opt = Adam(learning_rate=learning_rate, beta_1=beta_1)
disc_opt = Adam(learning_rate=learning_rate, beta_1=beta_1)
d2_opt = Adam(learning_rate=learning_rate)

## Load data

In [6]:
x_train, y_train, a_train = load_data('adult', 'train')
raw_data = (x_train, y_train, a_train)

In [7]:
x_train[:,-1]

array([0., 0., 0., ..., 1., 0., 0.])

In [8]:
'''For the FairGAN model our vector X must not include the protected atribute'''
x_train = x_train[:,:-1]

In [9]:
#a_train.reshape(24128,)

In [10]:
#any(x_train[:,-1] - a_train.reshape(24128,))

In [11]:
xdim = x_train.shape[1]
ydim = y_train.shape[1]
adim = a_train.shape[1]
noise_dim = (xdim + ydim)*2

In [12]:
xdim

111

In [13]:
x_train[0]

array([0.        , 0.        , 0.        , 1.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 1.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 1.        , 0.        ,
       0.        , 0.        , 1.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 1.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 1.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 1.        , 0.        , 0.37901745, 0.39795918,
       0.        , 0.        , 0.        , 0.        , 0.     

In [14]:
y_train[0]

array([0.])

In [15]:
a_train[0]

array([0.])

In [16]:
train_data = Dataset.from_tensor_slices((x_train, y_train, a_train))
train_data = train_data.batch(batch_size, drop_remainder=True)
train_data

<BatchDataset shapes: ((64, 111), (64, 1), (64, 1)), types: (tf.float64, tf.float64, tf.float64)>

In [17]:
x_valid, y_valid, a_valid = load_data('adult', 'valid')
x_valid = x_valid[:,:-1]

valid_data = Dataset.from_tensor_slices((x_valid, y_valid, a_valid))
valid_data = valid_data.batch(batch_size, drop_remainder=True)

In [18]:
x_test, y_test, a_test = load_data('adult', 'test')

test_data = Dataset.from_tensor_slices((x_test, y_test, a_test))
test_data = test_data.batch(batch_size, drop_remainder=True)

In [19]:
print(noise_dim)
print(xdim)
print(ydim)

224
111
1


# Aqui to fazendo testes pra ver se consigo contruir uma fairgan constuindo aos poucos

In [20]:
ae = Autoencoder2(xdim, ydim, adim, batch_size)
pre_train_loop(ae, raw_data, train_data, epochs=200, opt=ae_opt)

> Epoch | Model Loss
> 1 | 339.7480163574219
> 2 | 338.08074951171875
> 3 | 336.673583984375
> 4 | 335.3675537109375
> 5 | 286.6271667480469
> 6 | 252.3969268798828
> 7 | 225.0007781982422
> 8 | 206.53741455078125
> 9 | 185.6261444091797
> 10 | 160.08477783203125
> 11 | 143.46566772460938
> 12 | 128.7942657470703
> 13 | 115.33477020263672
> 14 | 108.74057006835938
> 15 | 101.9078140258789
> 16 | 93.48155212402344
> 17 | 82.00712585449219
> 18 | 71.96432495117188
> 19 | 64.35279846191406
> 20 | 58.251922607421875
> 21 | 52.314388275146484
> 22 | 46.67599105834961
> 23 | 41.29800796508789
> 24 | 36.02969741821289
> 25 | 32.04430389404297
> 26 | 29.081317901611328
> 27 | 26.90154457092285
> 28 | 25.029611587524414
> 29 | 23.050256729125977
> 30 | 21.139705657958984
> 31 | 18.915945053100586
> 32 | 16.61565589904785
> 33 | 15.573357582092285
> 34 | 14.839929580688477
> 35 | 13.999384880065918
> 36 | 13.433416366577148
> 37 | 12.986323356628418
> 38 | 12.83680248260498
> 39 | 12.17980861663

In [21]:
gen = Generator2(xdim, ydim, adim, noise_dim, dec=ae.dec)
disc = Discriminator2(xdim, ydim, adim)

train_loop(gen, disc, train_data, batch_size, noise_dim, epochs=10, gen_opt=gen_opt, disc_opt=disc_opt, d2_opt=None)

> Epoch | G Loss | Disc Loss
> 1 | 0.700960099697113 | 1.3788366317749023
> 2 | 0.7645028829574585 | 1.3401390314102173
> 3 | 0.7841710448265076 | 1.3266459703445435
> 4 | 0.6725199818611145 | 1.4012651443481445
> 5 | 0.9940604567527771 | 1.1955337524414062
> 6 | 0.46070435643196106 | 1.5458956956863403
> 7 | 0.3109281063079834 | 1.6304078102111816
> 8 | 0.3227590024471283 | 1.6053563356399536
> 9 | 0.3383178412914276 | 1.5873898267745972
> 10 | 0.3498944640159607 | 1.5707305669784546


In [22]:
gen_data = []

for X, Y, A in train_data:
    noise = tf.random.normal([batch_size, noise_dim])
    batch_gen_data = gen(noise, A, batch_size)
    gen_data.append(batch_gen_data)

In [23]:
gen_data

[<tf.Tensor: shape=(64, 113), dtype=float32, numpy=
 array([[9.43852775e-03, 9.99429047e-01, 1.36207682e-05, ...,
         6.08017248e-18, 1.80283670e-11, 0.00000000e+00],
        [2.34351261e-03, 9.99787152e-01, 3.41070445e-05, ...,
         3.70969726e-17, 3.35293841e-11, 0.00000000e+00],
        [1.12576992e-03, 9.99909163e-01, 7.17231478e-06, ...,
         1.01322346e-17, 2.71866071e-11, 0.00000000e+00],
        ...,
        [2.70348927e-03, 9.99638319e-01, 6.64219851e-06, ...,
         4.03543622e-16, 1.97069982e-11, 0.00000000e+00],
        [5.30083897e-03, 9.99611318e-01, 4.61123955e-06, ...,
         5.54178883e-17, 6.08286199e-11, 0.00000000e+00],
        [5.28255012e-04, 9.99871016e-01, 2.57642514e-05, ...,
         1.73757374e-16, 5.29688134e-11, 0.00000000e+00]], dtype=float32)>,
 <tf.Tensor: shape=(64, 113), dtype=float32, numpy=
 array([[9.4254203e-03, 9.9942976e-01, 1.3597290e-05, ..., 6.0799177e-18,
         1.8041679e-11, 0.0000000e+00],
        [2.3515220e-03, 9.99786

In [24]:
gen_data[0][0]

<tf.Tensor: shape=(113,), dtype=float32, numpy=
array([9.4385277e-03, 9.9942905e-01, 1.3620768e-05, 1.9777725e-10,
       3.1901607e-07, 1.1069413e-15, 9.5667424e-12, 2.9922459e-10,
       6.7507796e-12, 1.1665043e-10, 5.9485945e-12, 2.1171999e-07,
       2.3139336e-08, 3.5919837e-07, 6.5397821e-06, 5.6964617e-15,
       2.1497937e-10, 4.0858528e-10, 6.9711069e-12, 2.0043856e-18,
       7.5409590e-19, 8.5866989e-24, 9.9999917e-01, 2.2325674e-11,
       3.5465768e-05, 5.8796043e-20, 8.1869391e-13, 9.9980348e-01,
       1.3574356e-07, 3.6848388e-07, 9.9715471e-01, 1.5456652e-13,
       9.8448913e-23, 1.7447046e-04, 4.1820083e-08, 9.3591443e-06,
       5.8946245e-07, 6.2960509e-10, 4.1652220e-06, 9.9999940e-01,
       2.3067859e-08, 1.3836100e-02, 6.5692980e-04, 1.0818171e-07,
       9.9998569e-01, 6.2218046e-01, 3.5790779e-10, 7.1279198e-04,
       9.9998128e-01, 6.5798544e-05, 1.2529560e-12, 8.5486999e-29,
       3.8783424e-11, 2.4013229e-06, 6.4766525e-12, 8.7360830e-14,
       1.71804

In [25]:
tensor_gen_data = tf.convert_to_tensor(gen_data)
tensor_gen_data

<tf.Tensor: shape=(377, 64, 113), dtype=float32, numpy=
array([[[9.43852775e-03, 9.99429047e-01, 1.36207682e-05, ...,
         6.08017248e-18, 1.80283670e-11, 0.00000000e+00],
        [2.34351261e-03, 9.99787152e-01, 3.41070445e-05, ...,
         3.70969726e-17, 3.35293841e-11, 0.00000000e+00],
        [1.12576992e-03, 9.99909163e-01, 7.17231478e-06, ...,
         1.01322346e-17, 2.71866071e-11, 0.00000000e+00],
        ...,
        [2.70348927e-03, 9.99638319e-01, 6.64219851e-06, ...,
         4.03543622e-16, 1.97069982e-11, 0.00000000e+00],
        [5.30083897e-03, 9.99611318e-01, 4.61123955e-06, ...,
         5.54178883e-17, 6.08286199e-11, 0.00000000e+00],
        [5.28255012e-04, 9.99871016e-01, 2.57642514e-05, ...,
         1.73757374e-16, 5.29688134e-11, 0.00000000e+00]],

       [[9.42542031e-03, 9.99429762e-01, 1.35972896e-05, ...,
         6.07991771e-18, 1.80416793e-11, 0.00000000e+00],
        [2.35152198e-03, 9.99786437e-01, 3.42529711e-05, ...,
         3.70932933e-17, 3.

In [26]:
x_gen = tensor_gen_data[:,:,:-2]
y_gen = tensor_gen_data[:,:,-2]
a_gen = tensor_gen_data[:,:,-1]

In [27]:
#x_gen

In [28]:
num_batches = x_gen.shape[0]
new_shape = num_batches*batch_size

In [29]:
x_gen = tf.reshape(x_gen,shape=(new_shape, xdim))
y_gen = tf.reshape(y_gen,shape=(new_shape, ydim))
a_gen = tf.reshape(a_gen,shape=(new_shape, adim))

In [30]:
a_gen

<tf.Tensor: shape=(24128, 1), dtype=float32, numpy=
array([[0.],
       [0.],
       [0.],
       ...,
       [0.],
       [1.],
       [0.]], dtype=float32)>

In [31]:
unbatched_train = train_data.unbatch()
unbatched_train = list(unbatched_train.as_numpy_iterator())

x, y, a = [], [], []

for row in unbatched_train:
    x.append(row[0])
    y.append(row[1])
    a.append(row[2])


In [32]:
x = tf.convert_to_tensor(x)
y = tf.convert_to_tensor(y)
a = tf.convert_to_tensor(a)

In [33]:
xy = metrics.euclidian_dist(tf.concat((x, y), 1), tf.concat((x_gen, y_gen), 1))
print(xy)
xya = metrics.euclidian_dist(tf.concat((x, y, a), 1), tf.concat((x_gen, y_gen, a_gen), 1))
print(xya)
cond_a0, cond_a1 = metrics.euclidian_dist((tf.concat((x, y), 1), a), (tf.concat((x_gen, y_gen), 1), a_gen), conditional=True)
print(cond_a0, cond_a1)

tf.Tensor(510.73625197831996, shape=(), dtype=float64)
tf.Tensor(510.73625197831996, shape=(), dtype=float64)
tf.Tensor(423.87814709518045, shape=(), dtype=float64) tf.Tensor(284.9189981380997, shape=(), dtype=float64)
