## Import libs

In [1]:
from math import sqrt, isnan
from pathlib import Path

import tensorflow as tf
from tensorflow.keras.optimizers import Adam, Adagrad
from tensorflow.data import Dataset

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from util import metrics
from util.load_data import load_data
from util.evaluation import *

In [2]:
from experiment.models import Generator2
from experiment.models import Discriminator2
from experiment.learning import *

## Preliminaries

In [3]:
batch_size = 64
epochs = 10
#learning_rate = 0.001
#ae_opt = Adam(learning_rate=learning_rate)

In [4]:
learning_rate = 0.0001
beta_1 = 0.5
gen_opt = Adam(learning_rate=learning_rate, beta_1=beta_1)
disc_opt = Adam(learning_rate=learning_rate, beta_1=beta_1)
d2_opt = Adam(learning_rate=learning_rate)

## Load data

In [5]:
x_train, y_train, a_train = load_data('adult', 'train')
raw_data = (x_train, y_train, a_train)

In [6]:
x_train[:,-1]

array([0., 0., 0., ..., 1., 0., 0.])

In [7]:
'''For the FairGAN model our vector X must not include the protected atribute'''
x_train = x_train[:,:-1]

In [8]:
#a_train.reshape(24128,)

In [9]:
#any(x_train[:,-1] - a_train.reshape(24128,))

In [10]:
xdim = x_train.shape[1]
ydim = y_train.shape[1]
adim = a_train.shape[1]
noise_dim = xdim + ydim

In [11]:
xdim

111

In [12]:
x_train[0]

array([0.        , 0.        , 0.        , 1.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 1.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 1.        , 0.        ,
       0.        , 0.        , 1.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 1.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 1.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 1.        , 0.        , 0.37901745, 0.39795918,
       0.        , 0.        , 0.        , 0.        , 0.     

In [13]:
y_train[0]

array([0.])

In [14]:
a_train[0]

array([0.])

In [15]:
train_data = Dataset.from_tensor_slices((x_train, y_train, a_train))
train_data = train_data.batch(batch_size, drop_remainder=True)
train_data

<BatchDataset shapes: ((64, 111), (64, 1), (64, 1)), types: (tf.float64, tf.float64, tf.float64)>

In [16]:
x_valid, y_valid, a_valid = load_data('adult', 'valid')
x_valid = x_valid[:,:-1]

valid_data = Dataset.from_tensor_slices((x_valid, y_valid, a_valid))
valid_data = valid_data.batch(batch_size, drop_remainder=True)

In [17]:
x_test, y_test, a_test = load_data('adult', 'test')

test_data = Dataset.from_tensor_slices((x_test, y_test, a_test))
test_data = test_data.batch(batch_size, drop_remainder=True)

# Aqui to fazendo testes pra ver se consigo contruir uma fairgan constuindo aos poucos

In [18]:
gen = Generator2(xdim, ydim, adim)
disc = Discriminator2(xdim, ydim, adim)

train_loop(gen, disc, raw_data, train_data, batch_size, noise_dim, epochs=10, gen_opt=gen_opt, disc_opt=disc_opt, d2_opt=None)

> Epoch | G Loss | Disc Loss
> 1 | 0.6878520250320435 | 1.3878567218780518
> 2 | 0.6931471824645996 | 1.3862943649291992
> 3 | 0.6931471824645996 | 1.3862943649291992


KeyboardInterrupt: 

In [None]:
gen_data = []

for X, Y, A in train_data:
    noise = tf.random.normal([batch_size, noise_dim])
    batch_gen_data = gen(noise, A, batch_size)
    gen_data.append(batch_gen_data)

In [None]:
gen_data

[<tf.Tensor: shape=(64, 113), dtype=float32, numpy=
 array([[0.02038999, 0.04034507, 0.03090917, ..., 0.01928696, 0.28196585,
         0.        ],
        [0.02040375, 0.04036696, 0.03092775, ..., 0.01930024, 0.2819994 ,
         0.        ],
        [0.02038412, 0.04033513, 0.03090123, ..., 0.01928123, 0.28193557,
         0.        ],
        ...,
        [0.02043637, 0.0404183 , 0.03097099, ..., 0.01933149, 0.28206384,
         0.        ],
        [0.02038673, 0.04033995, 0.03090497, ..., 0.01928386, 0.28195822,
         0.        ],
        [0.02043272, 0.04041271, 0.03096624, ..., 0.019328  , 0.28207034,
         0.        ]], dtype=float32)>,
 <tf.Tensor: shape=(64, 113), dtype=float32, numpy=
 array([[0.0203945 , 0.04035216, 0.03091531, ..., 0.01929114, 0.2819768 ,
         0.        ],
        [0.02040854, 0.04037426, 0.03093406, ..., 0.01930466, 0.28201094,
         0.        ],
        [0.02035316, 0.04028613, 0.03085991, ..., 0.01925156, 0.2818599 ,
         1.        ],
 

In [None]:
tensor_gen_data = tf.convert_to_tensor(gen_data)
tensor_gen_data

<tf.Tensor: shape=(377, 64, 113), dtype=float32, numpy=
array([[[0.02038999, 0.04034507, 0.03090917, ..., 0.01928696,
         0.28196585, 0.        ],
        [0.02040375, 0.04036696, 0.03092775, ..., 0.01930024,
         0.2819994 , 0.        ],
        [0.02038412, 0.04033513, 0.03090123, ..., 0.01928123,
         0.28193557, 0.        ],
        ...,
        [0.02043637, 0.0404183 , 0.03097099, ..., 0.01933149,
         0.28206384, 0.        ],
        [0.02038673, 0.04033995, 0.03090497, ..., 0.01928386,
         0.28195822, 0.        ],
        [0.02043272, 0.04041271, 0.03096624, ..., 0.019328  ,
         0.28207034, 0.        ]],

       [[0.0203945 , 0.04035216, 0.03091531, ..., 0.01929114,
         0.2819768 , 0.        ],
        [0.02040854, 0.04037426, 0.03093406, ..., 0.01930466,
         0.28201094, 0.        ],
        [0.02035316, 0.04028613, 0.03085991, ..., 0.01925156,
         0.2818599 , 1.        ],
        ...,
        [0.02041775, 0.04038873, 0.03094617, ..., 0.

In [None]:
x_gen = tensor_gen_data[:,:,:-2]
y_gen = tensor_gen_data[:,:,-2]
a_gen = tensor_gen_data[:,:,-1]

In [None]:
#x_gen

In [None]:
num_batches = x_gen.shape[0]
new_shape = num_batches*batch_size

In [None]:
x_gen = tf.reshape(x_gen,shape=(new_shape, xdim))
y_gen = tf.reshape(y_gen,shape=(new_shape, ydim))
a_gen = tf.reshape(a_gen,shape=(new_shape, adim))

In [None]:
a_gen

<tf.Tensor: shape=(24128, 1), dtype=float32, numpy=
array([[0.],
       [0.],
       [0.],
       ...,
       [0.],
       [1.],
       [0.]], dtype=float32)>

In [None]:
unbatched_train = train_data.unbatch()
unbatched_train = list(unbatched_train.as_numpy_iterator())

x, y, a = [], [], []

for row in unbatched_train:
    x.append(row[0])
    y.append(row[1])
    a.append(row[2])


In [None]:
x = tf.convert_to_tensor(x)
y = tf.convert_to_tensor(y)
a = tf.convert_to_tensor(a)

In [None]:
def euclidian_dist(real_data, gen_data, conditional=False):

        '''if conditional:
        a0_filter4real_data = filtering(mask=1-real_data[1], data=real_data[0])
        a0_filter4gen_data = filtering(mask=1-gen_data[1], data=gen_data[0])

        a1_filter4real_data = filtering(mask=real_data[1], data=real_data[0])
        a1_filter4gen_data = filtering(mask=gen_data[1], data=gen_data[0])
        
        ed4a0 = euclidian_dist(a0_filter4real_data, a0_filter4gen_data)
        ed4a1 = euclidian_dist(a1_filter4real_data, a1_filter4gen_data)
        
        return (ed4a0, ed4a1)'''

    #else:
        real_data = tf.dtypes.cast(real_data, tf.double)
        gen_data = tf.dtypes.cast(gen_data, tf.double)

        if real_data.shape[0] > gen_data.shape[0]:
            real_data = real_data[:gen_data.shape[0]]
        elif real_data.shape[0] < gen_data.shape[0]:
            gen_data = gen_data[:real_data.shape[0]]
        else:
            pass

        '''return tf.sqrt(
                    tf.math.reduce_sum(
                        tf.math.squared_difference(real_data, gen_data)))'''
        return tf.norm((real_data- gen_data), ord='euclidean')

In [None]:
xy = metrics.euclidian_dist(tf.concat((x, y), 1), tf.concat((x_gen, y_gen), 1))
print(xy)
xya = metrics.euclidian_dist(tf.concat((x, y, a), 1), tf.concat((x_gen, y_gen, a_gen), 1))
print(xya)
cond_a0, cond_a1 = metrics.euclidian_dist((tf.concat((x, y), 1), a), (tf.concat((x_gen, y_gen), 1), a_gen), conditional=True)
print(cond_a0, cond_a1)

tf.Tensor(364.3536090381353, shape=(), dtype=float64)
tf.Tensor(364.3536090381353, shape=(), dtype=float64)
tf.Tensor(294.5991720803592, shape=(), dtype=float64) tf.Tensor(214.39421687321988, shape=(), dtype=float64)
