## Import libs

In [1]:
from math import sqrt, isnan
from pathlib import Path

import tensorflow as tf
from tensorflow.keras.optimizers import Adam, Adagrad
from tensorflow.data import Dataset

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from util import metrics
from util.load_data import load_data
from util.evaluation import *

In [2]:
from experiment.models import Generator2
from experiment.models import Discriminator2
from experiment.learning import *

## Preliminaries

In [3]:
batch_size = 64
epochs = 10
#learning_rate = 0.001
#ae_opt = Adam(learning_rate=learning_rate)

In [4]:
learning_rate = 0.0001
beta_1 = 0.5
gen_opt = Adam(learning_rate=learning_rate, beta_1=beta_1)
disc_opt = Adam(learning_rate=learning_rate, beta_1=beta_1)
d2_opt = Adam(learning_rate=learning_rate)

## Load data

In [5]:
x_train, y_train, a_train = load_data('adult', 'train')
raw_data = (x_train, y_train, a_train)

In [6]:
x_train[:,-1]

array([0., 0., 0., ..., 1., 0., 0.])

In [7]:
'''For the FairGAN model our vector X must not include the protected atribute'''
x_train = x_train[:,:-1]

In [8]:
#a_train.reshape(24128,)

In [9]:
#any(x_train[:,-1] - a_train.reshape(24128,))

In [10]:
xdim = x_train.shape[1]
ydim = y_train.shape[1]
adim = a_train.shape[1]
noise_dim = (xdim + ydim)*2

In [11]:
xdim

111

In [12]:
x_train[0]

array([0.        , 0.        , 0.        , 1.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 1.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 1.        , 0.        ,
       0.        , 0.        , 1.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 1.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 1.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 1.        , 0.        , 0.37901745, 0.39795918,
       0.        , 0.        , 0.        , 0.        , 0.     

In [13]:
y_train[0]

array([0.])

In [14]:
a_train[0]

array([0.])

In [15]:
train_data = Dataset.from_tensor_slices((x_train, y_train, a_train))
train_data = train_data.batch(batch_size, drop_remainder=True)
train_data

<BatchDataset shapes: ((64, 111), (64, 1), (64, 1)), types: (tf.float64, tf.float64, tf.float64)>

In [16]:
x_valid, y_valid, a_valid = load_data('adult', 'valid')
x_valid = x_valid[:,:-1]

valid_data = Dataset.from_tensor_slices((x_valid, y_valid, a_valid))
valid_data = valid_data.batch(batch_size, drop_remainder=True)

In [17]:
x_test, y_test, a_test = load_data('adult', 'test')

test_data = Dataset.from_tensor_slices((x_test, y_test, a_test))
test_data = test_data.batch(batch_size, drop_remainder=True)

In [18]:
print(noise_dim)
print(xdim)
print(ydim)

224
111
1


# Aqui to fazendo testes pra ver se consigo contruir uma fairgan constuindo aos poucos

In [19]:
gen = Generator2(xdim, ydim, adim, noise_dim)
disc = Discriminator2(xdim, ydim, adim)

train_loop(gen, disc, train_data, batch_size, noise_dim, epochs=10, gen_opt=gen_opt, disc_opt=disc_opt, d2_opt=None)

> Epoch | G Loss | Disc Loss
> 1 | 0.654364287853241 | 1.3896657228469849
> 2 | 0.6869360208511353 | 1.3862919807434082
> 3 | 0.6903964877128601 | 1.3863269090652466
> 4 | 0.6919848918914795 | 1.3863221406936646
> 5 | 0.6922826766967773 | 1.3862863779067993
> 6 | 0.692068874835968 | 1.386294960975647
> 7 | 0.6921754479408264 | 1.3862910270690918
> 8 | 0.6927060484886169 | 1.3863011598587036
> 9 | 0.6924933791160583 | 1.3862911462783813
> 10 | 0.6926924586296082 | 1.3863019943237305


In [20]:
gen_data = []

for X, Y, A in train_data:
    noise = tf.random.normal([batch_size, noise_dim])
    batch_gen_data = gen(noise, A, batch_size)
    gen_data.append(batch_gen_data)

In [21]:
gen_data

[<tf.Tensor: shape=(64, 113), dtype=float32, numpy=
 array([[0.00380155, 0.02529562, 0.41419286, ..., 0.00140241, 0.0582792 ,
         0.        ],
        [0.00368024, 0.02563505, 0.4268849 , ..., 0.00145478, 0.05741033,
         0.        ],
        [0.00374061, 0.02575013, 0.41903612, ..., 0.00141349, 0.05699494,
         0.        ],
        ...,
        [0.00365144, 0.02553005, 0.42148983, ..., 0.00140802, 0.05617594,
         0.        ],
        [0.00370534, 0.02574036, 0.41662163, ..., 0.0014347 , 0.05606757,
         0.        ],
        [0.00373912, 0.02646632, 0.423686  , ..., 0.00136728, 0.05712705,
         0.        ]], dtype=float32)>,
 <tf.Tensor: shape=(64, 113), dtype=float32, numpy=
 array([[0.00379997, 0.02528866, 0.41417927, ..., 0.00140174, 0.05826678,
         0.        ],
        [0.00369259, 0.02569213, 0.4269958 , ..., 0.00146046, 0.05750961,
         0.        ],
        [0.0037394 , 0.02574462, 0.41902545, ..., 0.00141296, 0.05698545,
         1.        ],
 

In [22]:
gen_data[0][0]

<tf.Tensor: shape=(113,), dtype=float32, numpy=
array([0.00380155, 0.02529562, 0.41419286, 0.6829809 , 0.68882674,
       0.04594617, 0.0164199 , 0.02391359, 0.00932825, 0.00672019,
       0.0045563 , 0.00402961, 0.01180242, 0.00196223, 0.00151682,
       0.00173714, 0.00250654, 0.00208097, 0.00965983, 0.0405758 ,
       0.02857192, 0.00192028, 0.07605582, 0.0099799 , 0.00146257,
       0.00233815, 0.04115706, 0.0060646 , 0.010613  , 0.00136518,
       0.8834656 , 0.01736876, 0.61010855, 0.10032348, 0.00140562,
       0.5335041 , 0.00136151, 0.91729975, 0.00194563, 0.04694247,
       0.00588282, 0.00428321, 0.03597967, 0.00140617, 0.46668682,
       0.5005193 , 0.00829642, 0.02230514, 0.01105562, 0.01859928,
       0.00155576, 0.6539421 , 0.00279999, 0.02234932, 0.00620418,
       0.01016538, 0.8983135 , 0.5727688 , 0.00471483, 0.04668678,
       0.01946559, 0.0171963 , 0.00172433, 0.00463216, 0.01887558,
       0.0016254 , 0.97683525, 0.00191384, 0.00274095, 0.10424429,
       0.00137

In [23]:
tensor_gen_data = tf.convert_to_tensor(gen_data)
tensor_gen_data

<tf.Tensor: shape=(377, 64, 113), dtype=float32, numpy=
array([[[0.00380155, 0.02529562, 0.41419286, ..., 0.00140241,
         0.0582792 , 0.        ],
        [0.00368024, 0.02563505, 0.4268849 , ..., 0.00145478,
         0.05741033, 0.        ],
        [0.00374061, 0.02575013, 0.41903612, ..., 0.00141349,
         0.05699494, 0.        ],
        ...,
        [0.00365144, 0.02553005, 0.42148983, ..., 0.00140802,
         0.05617594, 0.        ],
        [0.00370534, 0.02574036, 0.41662163, ..., 0.0014347 ,
         0.05606757, 0.        ],
        [0.00373912, 0.02646632, 0.423686  , ..., 0.00136728,
         0.05712705, 0.        ]],

       [[0.00379997, 0.02528866, 0.41417927, ..., 0.00140174,
         0.05826678, 0.        ],
        [0.00369259, 0.02569213, 0.4269958 , ..., 0.00146046,
         0.05750961, 0.        ],
        [0.0037394 , 0.02574462, 0.41902545, ..., 0.00141296,
         0.05698545, 1.        ],
        ...,
        [0.00366111, 0.02557496, 0.4215772 , ..., 0.

In [24]:
x_gen = tensor_gen_data[:,:,:-2]
y_gen = tensor_gen_data[:,:,-2]
a_gen = tensor_gen_data[:,:,-1]

In [25]:
#x_gen

In [26]:
num_batches = x_gen.shape[0]
new_shape = num_batches*batch_size

In [27]:
x_gen = tf.reshape(x_gen,shape=(new_shape, xdim))
y_gen = tf.reshape(y_gen,shape=(new_shape, ydim))
a_gen = tf.reshape(a_gen,shape=(new_shape, adim))

In [28]:
a_gen

<tf.Tensor: shape=(24128, 1), dtype=float32, numpy=
array([[0.],
       [0.],
       [0.],
       ...,
       [0.],
       [1.],
       [0.]], dtype=float32)>

In [29]:
unbatched_train = train_data.unbatch()
unbatched_train = list(unbatched_train.as_numpy_iterator())

x, y, a = [], [], []

for row in unbatched_train:
    x.append(row[0])
    y.append(row[1])
    a.append(row[2])


In [30]:
x = tf.convert_to_tensor(x)
y = tf.convert_to_tensor(y)
a = tf.convert_to_tensor(a)

In [31]:
xy = metrics.euclidian_dist(tf.concat((x, y), 1), tf.concat((x_gen, y_gen), 1))
print(xy)
xya = metrics.euclidian_dist(tf.concat((x, y, a), 1), tf.concat((x_gen, y_gen, a_gen), 1))
print(xya)
cond_a0, cond_a1 = metrics.euclidian_dist((tf.concat((x, y), 1), a), (tf.concat((x_gen, y_gen), 1), a_gen), conditional=True)
print(cond_a0, cond_a1)

tf.Tensor(430.7200494384954, shape=(), dtype=float64)
tf.Tensor(430.7200494384954, shape=(), dtype=float64)
tf.Tensor(341.85767907791427, shape=(), dtype=float64) tf.Tensor(262.01734340261083, shape=(), dtype=float64)
