# In this notebook we will perform training of adversarial anomaly detection model. 
#### For more details about this model refer to https://arxiv.org/pdf/1905.11034.pdf.

In [1]:
spark

Starting Spark application


ID,YARN Application ID,Kind,State,Spark UI,Driver log
75,application_1613059669625_0001,pyspark,idle,Link,Link


SparkSession available as 'spark'.
<pyspark.sql.session.SparkSession object at 0x7f11d408da50>

## Connect to hsfs and retrieve datasets for training and evaluation 

In [2]:
import hsfs
# Create a connection
connection = hsfs.connection()
# Get the feature store handle for the project's feature store
fs = connection.get_feature_store()

ben_td = fs.get_training_dataset("gan_non_sar_training_df", 1)
eval_td = fs.get_training_dataset("gan_eval_df", 1)

Connected. Call `.close()` to terminate connection gracefully.

In [3]:
#ben_td.read().count()

In [4]:
from hops import experiment
from hops import hdfs
import json
import tensorflow as tf

emb_best_hyperparams_path = "Resources/embeddings_best_hp.json"
emb_best_hyperparams = json.loads(hdfs.load(emb_best_hyperparams_path))
emb_args_dict = {}
for key in emb_best_hyperparams.keys():
    emb_args_dict[key] = [emb_best_hyperparams[key]]

    
best_hyperparams_path = "Resources/gan_best_hp.json"
best_hyperparams = json.loads(hdfs.load(best_hyperparams_path))
args_dict = {}
for key in best_hyperparams.keys():
    args_dict[key] = [best_hyperparams[key]]

### Define hopsworks experiments wrapper function and put all the training logic there. 

In [18]:
def experiment_wrapper():
    
    import os
    import sys
    import uuid
    import random    
    
    import tensorflow as tf
    from adversarialaml import keras_utils
    from adversarialaml.gan_enc_ano import GanAnomalyDetector,  GanAnomalyMonitor 
    from hops import tensorboard
    from hops import model as hops_model
        
    latent_dim = args_dict['latent_dim'][0]
    discriminator_n_layers = args_dict['discriminator_n_layers'][0]
    discriminator_batch_norm = args_dict['discriminator_batch_norm'][0]
    discriminator_dropout_rate = args_dict['discriminator_dropout_rate'][0]
    discriminator_learning_rate = args_dict['discriminator_learning_rate'][0]
    discriminator_extra_steps = args_dict['discriminator_extra_steps'][0]

    generator_start_n_units = args_dict['generator_start_n_units'][0]
    generator_n_layers = args_dict['generator_n_layers'][0]
    generator_activation_fn = args_dict['generator_activation_fn'][0]
    generator_batch_norm = args_dict['generator_batch_norm'][0]
    generator_dropout_rate = args_dict['generator_dropout_rate'][0] 
    generator_learning_rate = args_dict['generator_learning_rate'][0]

    encoder_start_n_units = args_dict['encoder_start_n_units'][0] 
    encoder_n_layers = args_dict['encoder_n_layers'][0]
    encoder_activation_fn = args_dict['encoder_activation_fn'][0]
    encoder_batch_norm = args_dict['encoder_batch_norm'][0]
    encoder_dropout_rate = args_dict['encoder_dropout_rate'][0] 
    encoder_learning_rate = args_dict['encoder_learning_rate'][0]
    
    discriminator_activation_fn = args_dict['discriminator_activation_fn'][0]

    discriminator_middle_layer_activation_fn = args_dict['discriminator_middle_layer_activation_fn'][0]    
    generator_middle_layer_activation_fn = args_dict['generator_middle_layer_activation_fn'][0]  
    encoder_middle_layer_activation_fn = args_dict['encoder_middle_layer_activation_fn'][0]  
    
    int_to_act_fn = {
        1: 'linear',        
        2: 'relu',
        3: 'leaky_relu',
        4: 'selu',
        5: 'tanh'
    }    

    # Define distribution strategy
    strategy = tf.distribute.OneDeviceStrategy(device="/gpu:0")

    options = tf.data.Options()
    options.experimental_distribute.auto_shard_policy = tf.data.experimental.AutoShardPolicy.OFF
    
    EPOCHS = 2
    # Define per device batch size
    batch_size_per_replica = 32
    # Define global batch size
    BATCH_SIZE = batch_size_per_replica * strategy.num_replicas_in_sync
    TOTAL_SAMPLES = 6366
    STEPS_PER_EPOCH=TOTAL_SAMPLES//BATCH_SIZE

    train_input = ben_td.tf_data(target_name='target', is_training=True)
    train_input_processed = train_input.tf_record_dataset(process=True, batch_size=BATCH_SIZE, num_epochs=EPOCHS)
    train_input_processed  = train_input_processed.with_options(options)

    eval_input = eval_td.tf_data(target_name='target', is_training=True)
    eval_input_processed = eval_input.tf_record_dataset(process=True, batch_size=1, num_epochs=EPOCHS)    
    eval_input_processed  = eval_input_processed.with_options(options)
        
    discriminator_activation_fn=int_to_act_fn[discriminator_activation_fn]
    discriminator_middle_layer_activation_fn=int_to_act_fn[discriminator_middle_layer_activation_fn]
    
    if discriminator_dropout_rate > 0.0:
        discriminator_batch_dropout = True
    else:
        discriminator_batch_dropout = False
    

    if discriminator_dropout_rate > 0.0:
        generator_batch_dropout=True
    else:
        generator_batch_dropout=False

    if encoder_dropout_rate > 0.0:
        encoder_batch_dropout=True
    else:
        encoder_batch_dropout=False   


    if discriminator_batch_norm==0:
        discriminator_batch_norm = False
    else:
        discriminator_batch_norm = True

    generator_activation_fn=int_to_act_fn[generator_activation_fn]
    generator_middle_layer_activation_fn=int_to_act_fn[generator_middle_layer_activation_fn]

    if generator_batch_norm==0:
        generator_batch_norm = False
    else:
        generator_batch_norm = True

    encoder_activation_fn=int_to_act_fn[encoder_activation_fn]
    encoder_middle_layer_activation_fn=int_to_act_fn[encoder_middle_layer_activation_fn]
                
    if encoder_batch_norm==0:
        encoder_batch_norm=False
    else:
        encoder_batch_norm=True        
        
    discriminator_double_neurons=False
    discriminator_bottleneck_neurons=True
    generator_double_neurons=True
    generator_bottleneck_neurons=False
    
    # construct model under distribution strategy scope
    with strategy.scope():    
        # Instantiate the GanAnomalyDetector model.
        gan_anomaly_detector = GanAnomalyDetector(
                    input_dim=emb_args_dict['emb_size'][0],
                    latent_dim=latent_dim,

                    discriminator_start_n_units=emb_args_dict['emb_size'][0],
                    discriminator_n_layers=discriminator_n_layers,
                    discriminator_activation_fn=discriminator_activation_fn,
                    discriminator_middle_layer_activation_fn=discriminator_middle_layer_activation_fn,
                    discriminator_double_neurons=discriminator_double_neurons,
                    discriminator_bottleneck_neurons=discriminator_bottleneck_neurons,
                    discriminator_batch_norm=discriminator_batch_norm,
                    discriminator_batch_dropout=discriminator_batch_dropout,
                    discriminator_dropout_rate=discriminator_dropout_rate,
                    discriminator_learning_rate=discriminator_learning_rate,
                    discriminator_extra_steps=discriminator_extra_steps,

                    generator_start_n_units=generator_start_n_units,
                    generator_n_layers=generator_n_layers,
                    generator_activation_fn=generator_activation_fn,
                    generator_middle_layer_activation_fn=generator_middle_layer_activation_fn,        
                    generator_double_neurons=generator_double_neurons,
                    generator_bottleneck_neurons=generator_bottleneck_neurons,
                    generator_batch_norm=generator_batch_norm,
                    generator_batch_dropout=generator_batch_dropout,
                    generator_dropout_rate=generator_dropout_rate,
                    generator_learning_rate=generator_learning_rate,

                    encoder_start_n_units=encoder_start_n_units,
                    encoder_n_layers=encoder_n_layers,
                    encoder_activation_fn=encoder_activation_fn,
                    encoder_middle_layer_activation_fn=encoder_middle_layer_activation_fn,        
                    encoder_batch_norm=encoder_batch_norm,
                    encoder_batch_dropout=encoder_batch_dropout,
                    encoder_dropout_rate=encoder_dropout_rate,
                    encoder_learning_rate=encoder_learning_rate,
        )
    

        # Compile the WGAN model.
        gan_anomaly_detector.compile()

    callbacks = [
        GanAnomalyMonitor(batch_size=1, latent_dim=latent_dim, input_dim=emb_args_dict['emb_size'][0]),
        keras_utils.TimeHistory(BATCH_SIZE,2, logdir=tensorboard.logdir()),
        tf.keras.callbacks.TensorBoard(log_dir=tensorboard.logdir()),
        tf.keras.callbacks.ModelCheckpoint(filepath=tensorboard.logdir()),
    ]
    
    # Start training the model.
    history = gan_anomaly_detector.fit(train_input_processed, 
                                       callbacks=[callbacks], 
                                       epochs=EPOCHS, 
                                       steps_per_epoch= STEPS_PER_EPOCH)

    metrics={'loss': history.history["g_loss"][0]} 

    # save to the model registry
    export_path = os.getcwd() + '/model-' + str(uuid.uuid4())
    print('Exporting trained model to: {}'.format(export_path))
    
    call = gan_anomaly_detector.serve_function.get_concrete_function(tf.TensorSpec([None,None], tf.float32))
    tf.saved_model.save(gan_anomaly_detector, export_path, signatures=call)

    print('Done exporting!')
        
    hops_model.export(export_path, 'ganAml', metrics=metrics)
    
    return metrics

## Use above experiments wrapper function to conduct hops training experiments.

In [19]:
from hops import experiment
    
experiment.launch(experiment_wrapper,  name='train_gan_aml', metric_key='loss', local_logdir=False)

Finished Experiment 

('hdfs://rpc.namenode.service.consul:8020/Projects/amldemo/Experiments/application_1613059669625_0001_6', {'loss': -0.009175586514174938, 'log': 'Experiments/application_1613059669625_0001_6/output.log'})