## Import libs

In [1]:
from math import sqrt, isnan
from pathlib import Path

import tensorflow as tf
from tensorflow.keras.optimizers import Adam, Adagrad
from tensorflow.data import Dataset

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from util import metrics
from util.load_data import load_data
from util.evaluation import *

In [2]:
from experiment.models import Generator2
from experiment.models import Discriminator2
from experiment.learning import *

## Preliminaries

In [3]:
batch_size = 64
epochs = 10
#learning_rate = 0.001
#ae_opt = Adam(learning_rate=learning_rate)

In [4]:
learning_rate = 0.0001
beta_1 = 0.5
gen_opt = Adam(learning_rate=learning_rate, beta_1=beta_1)
disc_opt = Adam(learning_rate=learning_rate, beta_1=beta_1)
d2_opt = Adam(learning_rate=learning_rate)

## Load data

In [5]:
x_train, y_train, a_train = load_data('adult', 'train')
raw_data = (x_train, y_train, a_train)

In [6]:
x_train[:,-1]

array([0., 0., 0., ..., 1., 0., 0.])

In [7]:
'''For the FairGAN model our vector X must not include the protected atribute'''
x_train = x_train[:,:-1]

In [8]:
#a_train.reshape(24128,)

In [9]:
#any(x_train[:,-1] - a_train.reshape(24128,))

In [10]:
xdim = x_train.shape[1]
ydim = y_train.shape[1]
adim = a_train.shape[1]
noise_dim = (xdim + ydim)*2

In [11]:
xdim

111

In [12]:
x_train[0]

array([0.        , 0.        , 0.        , 1.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 1.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 1.        , 0.        ,
       0.        , 0.        , 1.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 1.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 1.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 1.        , 0.        , 0.37901745, 0.39795918,
       0.        , 0.        , 0.        , 0.        , 0.     

In [13]:
y_train[0]

array([0.])

In [14]:
a_train[0]

array([0.])

In [15]:
train_data = Dataset.from_tensor_slices((x_train, y_train, a_train))
train_data = train_data.batch(batch_size, drop_remainder=True)
train_data

<BatchDataset shapes: ((64, 111), (64, 1), (64, 1)), types: (tf.float64, tf.float64, tf.float64)>

In [16]:
x_valid, y_valid, a_valid = load_data('adult', 'valid')
x_valid = x_valid[:,:-1]

valid_data = Dataset.from_tensor_slices((x_valid, y_valid, a_valid))
valid_data = valid_data.batch(batch_size, drop_remainder=True)

In [17]:
x_test, y_test, a_test = load_data('adult', 'test')

test_data = Dataset.from_tensor_slices((x_test, y_test, a_test))
test_data = test_data.batch(batch_size, drop_remainder=True)

In [18]:
print(noise_dim)
print(xdim)
print(ydim)

224
111
1


# Aqui to fazendo testes pra ver se consigo contruir uma fairgan constuindo aos poucos

In [19]:
gen = Generator2(xdim, ydim, adim, noise_dim)
disc = Discriminator2(xdim, ydim, adim)

train_loop(gen, disc, train_data, batch_size, noise_dim, epochs=10, gen_opt=gen_opt, disc_opt=disc_opt, d2_opt=None)

> Epoch | G Loss | Disc Loss
> 1 | 0.6943623423576355 | 1.3832975625991821
> 2 | 0.6932942867279053 | 1.386202096939087
> 3 | 0.6930084824562073 | 1.3863736391067505
> 4 | 0.6925643682479858 | 1.386659860610962
> 5 | 0.6931954026222229 | 1.386284351348877
> 6 | 0.6927180290222168 | 1.3865660429000854
> 7 | 0.6929154396057129 | 1.3864606618881226
> 8 | 0.692470908164978 | 1.386707067489624
> 9 | 0.6941415071487427 | 1.3857940435409546
> 10 | 0.6900399923324585 | 1.3870519399642944
> 11 | 0.6927889585494995 | 1.3858479261398315
> 12 | 0.6964219212532043 | 1.3859388828277588
> 13 | 0.6940259337425232 | 1.3858059644699097
> 14 | 0.6901366710662842 | 1.3818204402923584
> 15 | 0.7006853222846985 | 1.3881375789642334


In [20]:
gen_data = []

for X, Y, A in train_data:
    noise = tf.random.normal([batch_size, noise_dim])
    batch_gen_data = gen(noise, A, batch_size)
    gen_data.append(batch_gen_data)

In [21]:
gen_data

[<tf.Tensor: shape=(64, 113), dtype=float32, numpy=
 array([[0.02032302, 0.02632744, 0.01515298, ..., 0.00023927, 0.02558941,
         0.        ],
        [0.01969467, 0.02668619, 0.0159427 , ..., 0.00024831, 0.02520368,
         0.        ],
        [0.01998892, 0.02677994, 0.01544182, ..., 0.00024083, 0.02499557,
         0.        ],
        ...,
        [0.01957966, 0.02661485, 0.01561656, ..., 0.00024101, 0.02468234,
         0.        ],
        [0.01975857, 0.0267382 , 0.015308  , ..., 0.00024409, 0.0245639 ,
         0.        ],
        [0.01993613, 0.02748615, 0.01574409, ..., 0.00023261, 0.02503797,
         0.        ]], dtype=float32)>,
 <tf.Tensor: shape=(64, 113), dtype=float32, numpy=
 array([[2.0316364e-02, 2.6319481e-02, 1.5147700e-02, ..., 2.3910962e-04,
         2.5581498e-02, 0.0000000e+00],
        [1.9693980e-02, 2.6685294e-02, 1.5942095e-02, ..., 2.4829403e-04,
         2.5202839e-02, 0.0000000e+00],
        [1.9985016e-02, 2.6775122e-02, 1.5438629e-02, ..., 2.

In [22]:
gen_data[0][0]

<tf.Tensor: shape=(113,), dtype=float32, numpy=
array([2.03230195e-02, 2.63274368e-02, 1.51529778e-02, 1.60600189e-02,
       2.17101965e-02, 1.57096367e-02, 1.49020795e-02, 2.30021533e-02,
       9.39650927e-03, 3.91490012e-03, 2.58817039e-02, 2.13951692e-02,
       1.27873952e-02, 1.26506472e-02, 1.03436876e-02, 5.58997272e-03,
       2.83836457e-03, 2.92649847e-02, 6.61882525e-03, 1.93823371e-02,
       5.90848178e-02, 1.20942490e-02, 3.91089395e-02, 6.40565678e-02,
       2.54825340e-04, 1.84802141e-03, 2.80613117e-02, 3.13183852e-02,
       2.23791040e-02, 3.96334828e-04, 9.70057726e-01, 8.01158417e-03,
       1.45699698e-02, 1.85429845e-02, 2.92220240e-04, 2.28132010e-02,
       3.99849552e-04, 6.47081435e-02, 7.62248691e-03, 5.93818426e-02,
       3.02265789e-02, 2.50490215e-02, 1.88643504e-02, 2.05791905e-03,
       1.92660913e-02, 2.35583391e-02, 4.90738917e-03, 2.45103389e-02,
       1.55053157e-02, 1.77778751e-02, 1.64209232e-02, 1.47674065e-02,
       6.78319484e-03, 2.2647

In [23]:
tensor_gen_data = tf.convert_to_tensor(gen_data)
tensor_gen_data

<tf.Tensor: shape=(377, 64, 113), dtype=float32, numpy=
array([[[2.03230195e-02, 2.63274368e-02, 1.51529778e-02, ...,
         2.39270623e-04, 2.55894139e-02, 0.00000000e+00],
        [1.96946710e-02, 2.66861878e-02, 1.59427002e-02, ...,
         2.48312019e-04, 2.52036843e-02, 0.00000000e+00],
        [1.99889205e-02, 2.67799441e-02, 1.54418247e-02, ...,
         2.40834299e-04, 2.49955729e-02, 0.00000000e+00],
        ...,
        [1.95796583e-02, 2.66148522e-02, 1.56165631e-02, ...,
         2.41008209e-04, 2.46823374e-02, 0.00000000e+00],
        [1.97585691e-02, 2.67381985e-02, 1.53080011e-02, ...,
         2.44088485e-04, 2.45638974e-02, 0.00000000e+00],
        [1.99361332e-02, 2.74861492e-02, 1.57440919e-02, ...,
         2.32612743e-04, 2.50379685e-02, 0.00000000e+00]],

       [[2.03163642e-02, 2.63194814e-02, 1.51477000e-02, ...,
         2.39109620e-04, 2.55814977e-02, 0.00000000e+00],
        [1.96939800e-02, 2.66852938e-02, 1.59420948e-02, ...,
         2.48294033e-04, 2.

In [24]:
x_gen = tensor_gen_data[:,:,:-2]
y_gen = tensor_gen_data[:,:,-2]
a_gen = tensor_gen_data[:,:,-1]

In [25]:
#x_gen

In [26]:
num_batches = x_gen.shape[0]
new_shape = num_batches*batch_size

In [27]:
x_gen = tf.reshape(x_gen,shape=(new_shape, xdim))
y_gen = tf.reshape(y_gen,shape=(new_shape, ydim))
a_gen = tf.reshape(a_gen,shape=(new_shape, adim))

In [28]:
a_gen

<tf.Tensor: shape=(24128, 1), dtype=float32, numpy=
array([[0.],
       [0.],
       [0.],
       ...,
       [0.],
       [1.],
       [0.]], dtype=float32)>

In [29]:
unbatched_train = train_data.unbatch()
unbatched_train = list(unbatched_train.as_numpy_iterator())

x, y, a = [], [], []

for row in unbatched_train:
    x.append(row[0])
    y.append(row[1])
    a.append(row[2])


In [30]:
x = tf.convert_to_tensor(x)
y = tf.convert_to_tensor(y)
a = tf.convert_to_tensor(a)

In [31]:
xy = metrics.euclidian_dist(tf.concat((x, y), 1), tf.concat((x_gen, y_gen), 1))
print(xy)
xya = metrics.euclidian_dist(tf.concat((x, y, a), 1), tf.concat((x_gen, y_gen, a_gen), 1))
print(xya)
cond_a0, cond_a1 = metrics.euclidian_dist((tf.concat((x, y), 1), a), (tf.concat((x_gen, y_gen), 1), a_gen), conditional=True)
print(cond_a0, cond_a1)

tf.Tensor(390.35729644598706, shape=(), dtype=float64)
tf.Tensor(390.35729644598706, shape=(), dtype=float64)
tf.Tensor(320.0538338735577, shape=(), dtype=float64) tf.Tensor(223.48235346768965, shape=(), dtype=float64)
