## Import libs

In [1]:
from math import sqrt, isnan
from pathlib import Path

import tensorflow as tf
from tensorflow.keras.optimizers import Adam, Adagrad
from tensorflow.data import Dataset

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from util import metrics
from util.load_data import load_data
from util.evaluation import *

In [2]:
from experiment.models import Generator2
from experiment.models import Discriminator2
from experiment.learning import *

## Preliminaries

In [3]:
batch_size = 64
epochs = 10
#learning_rate = 0.001
#ae_opt = Adam(learning_rate=learning_rate)

In [4]:
learning_rate = 0.0001
beta_1 = 0.5
gen_opt = Adam(learning_rate=learning_rate, beta_1=beta_1)
disc_opt = Adam(learning_rate=learning_rate, beta_1=beta_1)
d2_opt = Adam(learning_rate=learning_rate)

## Load data

In [5]:
x_train, y_train, a_train = load_data('adult', 'train')
raw_data = (x_train, y_train, a_train)

In [6]:
x_train[:,-1]

array([0., 0., 0., ..., 1., 0., 0.])

In [7]:
'''For the FairGAN model our vector X must not include the protected atribute'''
x_train = x_train[:,:-1]

In [8]:
#a_train.reshape(24128,)

In [9]:
#any(x_train[:,-1] - a_train.reshape(24128,))

In [10]:
xdim = x_train.shape[1]
ydim = y_train.shape[1]
adim = a_train.shape[1]
noise_dim = xdim + ydim

In [11]:
xdim

111

In [12]:
x_train[0]

array([0.        , 0.        , 0.        , 1.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 1.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 1.        , 0.        ,
       0.        , 0.        , 1.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 1.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 1.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 1.        , 0.        , 0.37901745, 0.39795918,
       0.        , 0.        , 0.        , 0.        , 0.     

In [13]:
y_train[0]

array([0.])

In [14]:
a_train[0]

array([0.])

In [15]:
train_data = Dataset.from_tensor_slices((x_train, y_train, a_train))
train_data = train_data.batch(batch_size, drop_remainder=True)
train_data

<BatchDataset shapes: ((64, 111), (64, 1), (64, 1)), types: (tf.float64, tf.float64, tf.float64)>

In [16]:
x_valid, y_valid, a_valid = load_data('adult', 'valid')
x_valid = x_valid[:,:-1]

valid_data = Dataset.from_tensor_slices((x_valid, y_valid, a_valid))
valid_data = valid_data.batch(batch_size, drop_remainder=True)

In [17]:
x_test, y_test, a_test = load_data('adult', 'test')

test_data = Dataset.from_tensor_slices((x_test, y_test, a_test))
test_data = test_data.batch(batch_size, drop_remainder=True)

# Aqui to fazendo testes pra ver se consigo contruir uma fairgan constuindo aos poucos

In [18]:
gen = Generator2(xdim, ydim, adim)
disc = Discriminator2(xdim, ydim, adim)

train_loop(gen, disc, train_data, batch_size, noise_dim, epochs=10, gen_opt=gen_opt, disc_opt=disc_opt, d2_opt=None)

> Epoch | G Loss | Disc Loss
> 1 | 0.59644615650177 | 1.4046815633773804
> 2 | 0.667804479598999 | 1.3869768381118774
> 3 | 0.6772410273551941 | 1.3865547180175781
> 4 | 0.6814163327217102 | 1.3864340782165527
> 5 | 0.683863639831543 | 1.3863815069198608
> 6 | 0.6855032444000244 | 1.3863544464111328
> 7 | 0.6866911053657532 | 1.3863357305526733
> 8 | 0.6875976920127869 | 1.386326789855957
> 9 | 0.6883259415626526 | 1.3863178491592407
> 10 | 0.6889105439186096 | 1.38631272315979


In [19]:
gen_data = []

for X, Y, A in train_data:
    noise = tf.random.normal([batch_size, noise_dim])
    batch_gen_data = gen(noise, A, batch_size)
    gen_data.append(batch_gen_data)

In [20]:
gen_data

[<tf.Tensor: shape=(64, 113), dtype=float32, numpy=
 array([[0.00467438, 0.03349633, 0.00856027, ..., 0.00207525, 0.0821316 ,
         0.        ],
        [0.00198299, 0.04564368, 0.02964945, ..., 0.0049807 , 0.05528092,
         0.        ],
        [0.00297159, 0.05067035, 0.01360709, ..., 0.00240213, 0.0459578 ,
         0.        ],
        ...,
        [0.00173398, 0.04284191, 0.01777053, ..., 0.00235504, 0.03303567,
         0.        ],
        [0.00228127, 0.04890581, 0.01102813, ..., 0.00336309, 0.02992428,
         0.        ],
        [0.0028652 , 0.09524665, 0.02241669, ..., 0.00100387, 0.04820903,
         0.        ]], dtype=float32)>,
 <tf.Tensor: shape=(64, 113), dtype=float32, numpy=
 array([[0.00467738, 0.03350744, 0.0085643 , ..., 0.00207667, 0.08216073,
         0.        ],
        [0.00198296, 0.04564336, 0.02964913, ..., 0.00498062, 0.05528044,
         0.        ],
        [0.0029667 , 0.05062821, 0.0135908 , ..., 0.00239795, 0.04591453,
         1.        ],
 

In [21]:
tensor_gen_data = tf.convert_to_tensor(gen_data)
tensor_gen_data

<tf.Tensor: shape=(377, 64, 113), dtype=float32, numpy=
array([[[0.00467438, 0.03349633, 0.00856027, ..., 0.00207525,
         0.0821316 , 0.        ],
        [0.00198299, 0.04564368, 0.02964945, ..., 0.0049807 ,
         0.05528092, 0.        ],
        [0.00297159, 0.05067035, 0.01360709, ..., 0.00240213,
         0.0459578 , 0.        ],
        ...,
        [0.00173398, 0.04284191, 0.01777053, ..., 0.00235504,
         0.03303567, 0.        ],
        [0.00228127, 0.04890581, 0.01102813, ..., 0.00336309,
         0.02992428, 0.        ],
        [0.0028652 , 0.09524665, 0.02241669, ..., 0.00100387,
         0.04820903, 0.        ]],

       [[0.00467738, 0.03350744, 0.0085643 , ..., 0.00207667,
         0.08216073, 0.        ],
        [0.00198296, 0.04564336, 0.02964913, ..., 0.00498062,
         0.05528044, 0.        ],
        [0.0029667 , 0.05062821, 0.0135908 , ..., 0.00239795,
         0.04591453, 1.        ],
        ...,
        [0.00173379, 0.0428395 , 0.01776912, ..., 0.

In [22]:
x_gen = tensor_gen_data[:,:,:-2]
y_gen = tensor_gen_data[:,:,-2]
a_gen = tensor_gen_data[:,:,-1]

In [23]:
#x_gen

In [24]:
num_batches = x_gen.shape[0]
new_shape = num_batches*batch_size

In [25]:
x_gen = tf.reshape(x_gen,shape=(new_shape, xdim))
y_gen = tf.reshape(y_gen,shape=(new_shape, ydim))
a_gen = tf.reshape(a_gen,shape=(new_shape, adim))

In [26]:
a_gen

<tf.Tensor: shape=(24128, 1), dtype=float32, numpy=
array([[0.],
       [0.],
       [0.],
       ...,
       [0.],
       [1.],
       [0.]], dtype=float32)>

In [27]:
unbatched_train = train_data.unbatch()
unbatched_train = list(unbatched_train.as_numpy_iterator())

x, y, a = [], [], []

for row in unbatched_train:
    x.append(row[0])
    y.append(row[1])
    a.append(row[2])


In [28]:
x = tf.convert_to_tensor(x)
y = tf.convert_to_tensor(y)
a = tf.convert_to_tensor(a)

In [29]:
def euclidian_dist(real_data, gen_data, conditional=False):

        '''if conditional:
        a0_filter4real_data = filtering(mask=1-real_data[1], data=real_data[0])
        a0_filter4gen_data = filtering(mask=1-gen_data[1], data=gen_data[0])

        a1_filter4real_data = filtering(mask=real_data[1], data=real_data[0])
        a1_filter4gen_data = filtering(mask=gen_data[1], data=gen_data[0])
        
        ed4a0 = euclidian_dist(a0_filter4real_data, a0_filter4gen_data)
        ed4a1 = euclidian_dist(a1_filter4real_data, a1_filter4gen_data)
        
        return (ed4a0, ed4a1)'''

    #else:
        real_data = tf.dtypes.cast(real_data, tf.double)
        gen_data = tf.dtypes.cast(gen_data, tf.double)

        if real_data.shape[0] > gen_data.shape[0]:
            real_data = real_data[:gen_data.shape[0]]
        elif real_data.shape[0] < gen_data.shape[0]:
            gen_data = gen_data[:real_data.shape[0]]
        else:
            pass

        '''return tf.sqrt(
                    tf.math.reduce_sum(
                        tf.math.squared_difference(real_data, gen_data)))'''
        return tf.norm((real_data- gen_data), ord='euclidean')

In [30]:
xy = metrics.euclidian_dist(tf.concat((x, y), 1), tf.concat((x_gen, y_gen), 1))
print(xy)
xya = metrics.euclidian_dist(tf.concat((x, y, a), 1), tf.concat((x_gen, y_gen, a_gen), 1))
print(xya)
cond_a0, cond_a1 = metrics.euclidian_dist((tf.concat((x, y), 1), a), (tf.concat((x_gen, y_gen), 1), a_gen), conditional=True)
print(cond_a0, cond_a1)

tf.Tensor(382.40770218284104, shape=(), dtype=float64)
tf.Tensor(382.40770218284104, shape=(), dtype=float64)
tf.Tensor(308.0703674682468, shape=(), dtype=float64) tf.Tensor(226.55749684514927, shape=(), dtype=float64)
