In [1]:
from __future__ import print_function, division

import numpy as np
import matplotlib.pyplot as plt
import warnings
import tensorflow as tf

#Prevent TensorFlow from taking up all of GPU memory
gpus = tf.config.experimental.list_physical_devices('GPU')
for gpu in gpus:
    tf.config.experimental.set_memory_growth(gpu, True)

warnings.simplefilter('ignore', UserWarning)

from util import *

In [2]:
data_dim = 2
data_size = 10000

X2, X1, y2, y1, columns = generate_lipton(scale=True, num_pts=data_size)
name1 = 'women'
name2 = 'men'

# Kolmogorov–Smirnov test

In [3]:
from scipy.stats import ks_2samp as ks

warnings.simplefilter('ignore', UserWarning)

lambda_l1 = 1e-4
ks_work_exp = []
ks_hair_len = []
fakes = []

for i in range(10):
    print('GAN #{}'.format(i+1))
    D, G, combined = create_gan_small(data_dim, trans_loss_func=squared_l1_loss, trans_loss_wt=lambda_l1)
    train(D, G, combined, X1, X2, name1, name2, plot_progress=False)
    X_fake2 = G.predict(X1)
    fakes.append(X_fake2)
    ks_work_exp.append(ks(X2[:,0], X_fake2[:,0]).statistic)
    ks_hair_len.append(ks(X2[:,1], X_fake2[:,1]).statistic)

ks_work_exp = np.array(ks_work_exp)
ks_hair_len = np.array(ks_hair_len)
print(ks_work_exp, ks_work_exp.mean(), ks_work_exp.std())
print(ks_hair_len, ks_hair_len.mean(), ks_hair_len.std())

GAN #1
GAN #2
GAN #3
GAN #4
GAN #5
GAN #6
GAN #7
GAN #8
GAN #9
GAN #10
[0.0515 0.0461 0.0679 0.0521 0.055  0.0378 0.0492 0.0629 0.0375 0.0641] 0.052410000000000026 0.00990024747165444
[0.0238 0.0092 0.0247 0.0183 0.0366 0.0838 0.045  0.0674 0.0493 0.0729] 0.04310000000000001 0.02386340294258137


# Mean Squared Error

In [4]:
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error as mse

X2_train, X2_test = train_test_split(X2, train_size=0.5, test_size=0.5)
lr_work_exp = LinearRegression().fit(X2_train[:,1:2], X2_train[:,0]) #predict work_exp from hair_len
lr_hair_len = LinearRegression().fit(X2_train[:,0:1], X2_train[:,1]) #predict hair_len from work_exp

print('work_exp MSE on real: {}'.format(mse(X2_test[:,0], lr_hair_len.predict(X2_test[:,1:2]))))
print('hair_len MSE on real: {}'.format(mse(X2_test[:,1], lr_hair_len.predict(X2_test[:,0:1]))))

mse_work_exp = []
mse_hair_len = []
for X_fake2 in fakes:
    mse_work_exp.append(mse(X_fake2[:,0], lr_hair_len.predict(X_fake2[:,1:2])))
    mse_hair_len.append(mse(X_fake2[:,1], lr_hair_len.predict(X_fake2[:,0:1])))
mse_work_exp = np.array(mse_work_exp)
mse_hair_len = np.array(mse_hair_len)
print(mse_work_exp, mse_work_exp.mean(), mse_work_exp.std())
print(mse_hair_len, mse_hair_len.mean(), mse_hair_len.std())

work_exp MSE on real: 2.010543298177521
hair_len MSE on real: 0.3305221132230612
[1.86572853 1.8547777  1.8785402  1.80400211 1.69368272 1.85677528
 1.90254464 1.8043894  1.90357428 1.88197615] 1.8445991016343313 0.06010308303578143
[0.34198437 0.33678067 0.34249321 0.34243129 0.36220409 0.31056555
 0.37430472 0.30450019 0.32173123 0.33662457] 0.3373619886445253 0.02024459886773657
