In [None]:
# reloading the libraries and setting the parameters

import GAN_171103
import importlib
importlib.reload(GAN_171103) # For reloading after making changes
from GAN_171103 import *

import numpy as np
import pandas as pd
from keras import layers
from keras import models

In [None]:
paths=['cars_dummies','mammographic_masses',
       'nursery','mushroom',
       'winequality-white']
path='data/{}.csv'
pos=2
df=pd.read_csv(path.format(paths[pos]))
print(df.columns[-1])

In [None]:

rand_dim = 32 # 32 # needs to be ~data_dim
base_n_count = 128 # 128

nb_steps = 500 + 1 # 50000 # Add one for logging of the last interval
batch_size = 128 # 64

k_d = 1  # number of critic network updates per adversarial training step
k_g = 1  # number of generator network updates per adversarial training step
critic_pre_train_steps = 100 # 100  # number of steps to pre-train the critic before starting adversarial training
log_interval = 100 # 100  # interval (in steps) at which to log loss summaries and save plots of image samples to disc
learning_rate = 5e-4 # 5e-5
data_dir = 'cache/'+paths[pos]+'/'
generator_model_path, discriminator_model_path, loss_pickle_path = None, None, None

# show = False
show = True 

train = df.copy().reset_index(drop=True) 

# train = pd.get_dummies(train, columns=['Class'], prefix='Class', drop_first=True)
label_cols = [ df.columns[-1] ]
data_cols = [ i for i in train.columns if i not in label_cols ]
#train[ data_cols ] = train[ data_cols ]  # scale to random noise size, one less thing to learn
train_no_label = train[ data_cols ]

In [None]:
# Training the vanilla GAN and CGAN architectures

k_d = 1  # number of critic network updates per adversarial training step
learning_rate = 5e-4 # 5e-5
arguments = [rand_dim, nb_steps, batch_size, 
             k_d, k_g, critic_pre_train_steps, log_interval, learning_rate, base_n_count,
            data_dir, generator_model_path, discriminator_model_path, loss_pickle_path, show ]

adversarial_training_GAN(arguments, train_no_label, data_cols ) # GAN

In [None]:
# Let's look at some of the generated data
# First create the networks locally and load the weights

import GAN_171103
import importlib
importlib.reload(GAN_171103) # For reloading after making changes
from GAN_171103 import *

seed = 17

train = df.copy().reset_index(drop=True) # fraud only with labels from classification

# train = pd.get_dummies(train, columns=['Class'], prefix='Class', drop_first=True)
label_cols = [ train.columns[-1]  ]
data_cols = [ i for i in train.columns if i not in label_cols ]
#train[ data_cols ] = train[ data_cols ] / 10 # scale to random noise size, one less thing to learn
train_no_label = train[ data_cols ]

data_dim = len(data_cols)
label_dim = len(label_cols)
with_class = False
#if label_dim > 0: with_class = True
np.random.seed(seed)

# define network models

generator_model, discriminator_model, combined_model = define_models_GAN(rand_dim, data_dim, base_n_count)
generator_model.load_weights('cache/'+paths[pos]+'/GAN_generator_model_weights_step_500.h5')

In [None]:

# Now generate some new data

test_size = 492 # Equal to all of the fraud cases

x = get_data_batch(train_no_label, test_size, seed=3)
z = np.random.normal(size=(test_size, rand_dim))
if with_class:
    labels = x[:,-label_dim:]
    g_z = generator_model.predict([z, labels])
else:
    g_z = generator_model.predict(z)

In [None]:
# =============================================================================
# check data
# =============================================================================
df2=pd.DataFrame(np.rint(np.abs(g_z)),columns=df.columns[:-1])
df2=df2.astype(int)

In [None]:
# =============================================================================
#  rebuild the neural network
# =============================================================================

import numpy as np
import pandas as pd
from keras import layers
from keras import models

def critic_network(x, data_dim, base_n_count):
    x = layers.Dense(base_n_count*4, activation='relu')(x)
    # x = layers.Dropout(0.1)(x)
    x = layers.Dense(base_n_count*2, activation='relu')(x) # 2
    # x = layers.Dropout(0.1)(x)
    x = layers.Dense(base_n_count*1, activation='relu')(x) # 1
    # x = layers.Dense(base_n_count*4, activation='relu')(x) # extra
    # x = layers.Dense(base_n_count*4, activation='relu')(x) # extra
    # x = layers.Dense(1, activation='sigmoid')(x)
    x = layers.Dense(1)(x)
    return x

def generator_network(x, data_dim, base_n_count): 
    x = layers.Dense(base_n_count, activation='relu')(x)
    x = layers.Dense(base_n_count*2, activation='relu')(x)
    x = layers.Dense(base_n_count*4, activation='relu')(x)
    x = layers.Dense(data_dim)(x)    
    return x

def discriminator_network(x, data_dim, base_n_count):
    x = layers.Dense(base_n_count*4, activation='relu')(x)
    # x = layers.Dropout(0.1)(x)
    x = layers.Dense(base_n_count*2, activation='relu')(x)
    # x = layers.Dropout(0.1)(x)
    x = layers.Dense(base_n_count, activation='relu')(x)
    x = layers.Dense(1, activation='sigmoid')(x)
    # x = layers.Dense(1)(x)
    return x

def define_models_GAN(rand_dim, data_dim, base_n_count, type=None):
    generator_input_tensor = layers.Input(shape=(rand_dim, ))
    generated_image_tensor = generator_network(generator_input_tensor, data_dim, base_n_count)

    generated_or_real_image_tensor = layers.Input(shape=(data_dim,))
    
    if type == 'Wasserstein':
        discriminator_output = critic_network(generated_or_real_image_tensor, data_dim, base_n_count)
    else:
        discriminator_output = discriminator_network(generated_or_real_image_tensor, data_dim, base_n_count)

    generator_model = models.Model(inputs=[generator_input_tensor], outputs=[generated_image_tensor], name='generator')
    discriminator_model = models.Model(inputs=[generated_or_real_image_tensor],
                                       outputs=[discriminator_output],
                                       name='discriminator')

    combined_output = discriminator_model(generator_model(generator_input_tensor))
    combined_model = models.Model(inputs=[generator_input_tensor], outputs=[combined_output], name='combined')
    
    return generator_model, discriminator_model, combined_model

In [None]:
    
#rand_dim = 32 
#base_n_count = 128
#data_dim = len(data_cols) 
#'nursery','mushroom'
paths=['cars_dummies','mammographic_masses','nursery','mushroom','winequality-white']
path='data/{}.csv'
pos=3
df=pd.read_csv(path.format(paths[pos]))

n_samples=10
Xdf=df.iloc[:,:-1]    
path_model='cache/'+paths[pos]+'/GAN_generator_model_weights_step_500.h5'

In [None]:
def generate_data_gan(n_samples,Xdf,path_model,rand_dim=32):
    generator_model, discriminator_model, combined_model= define_models_GAN(rand_dim,data_dim=len(Xdf.columns),base_n_count=128)
    generator_model.load_weights(path_model)
    z = np.random.normal(size=(n_samples, rand_dim))
    g_z = generator_model.predict(z)
    
    newdf = pd.DataFrame()
    for i, col in enumerate(Xdf.columns):

        if Xdf[col].dtype == 'int32' or Xdf[col].dtype == 'int64':
            newdf[col] = np.rint(np.abs(g_z[:,i])).astype(int)    
        elif Xdf[col].dtype == 'float_':
            newdf[col] = np.abs(g_z[:,i])
    return newdf

In [None]:
   
data=generate_data_gan(n_samples,Xdf,path_model,rand_dim=32)   