# In this notebook we will perform hyperparameter tuning and training of adversarial anomaly detection model. 
#### For more details about this model refer to https://arxiv.org/pdf/1905.11034.pdf.

In [1]:
spark

Starting Spark application


ID,YARN Application ID,Kind,State,Spark UI,Driver log
17,application_1607949680860_0018,pyspark,idle,Link,Link


SparkSession available as 'spark'.
<pyspark.sql.session.SparkSession object at 0x7fb98d4f0f90>

## Connect to hsfs and retrieve datasets for training and evaluation 

In [2]:
import hsfs
# Create a connection
connection = hsfs.connection()
# Get the feature store handle for the project's feature store
fs = connection.get_feature_store()

ben_td = fs.get_training_dataset("gan_non_sar_training_df", 1)
eval_td = fs.get_training_dataset("gan_eval_df", 1)

Connected. Call `.close()` to terminate connection gracefully.

In [3]:
from hops import experiment
from hops import hdfs
import json
import tensorflow as tf
emb_best_hyperparams_path = "Resources/embeddings_best_hp.json"
emb_best_hyperparams = json.loads(hdfs.load(emb_best_hyperparams_path))
emb_args_dict = {}
for key in emb_best_hyperparams.keys():
    emb_args_dict[key] = [emb_best_hyperparams[key]]

best_hyperparams_path = "Resources/gan_best_hp.json"
best_hyperparams = json.loads(hdfs.load(best_hyperparams_path))
args_dict = {}
for key in best_hyperparams.keys():
    args_dict[key] = [best_hyperparams[key]]
    

### Define hopsworks experiments wrapper function and put all the training logic there. 

In [4]:
def experiment_wrapper():
    
    latent_dim = args_dict['latent_dim'][0]
    discriminator_n_layers = args_dict['discriminator_n_layers'][0]
    discriminator_batch_norm = args_dict['discriminator_batch_norm'][0]
    discriminator_dropout_rate = args_dict['discriminator_dropout_rate'][0]
    discriminator_learning_rate = args_dict['discriminator_learning_rate'][0]
    discriminator_extra_steps = args_dict['discriminator_extra_steps'][0]

    generator_start_n_units = args_dict['generator_start_n_units'][0]
    generator_n_layers = args_dict['generator_n_layers'][0]
    generator_activation_fn = args_dict['generator_activation_fn'][0]
    generator_batch_norm = args_dict['generator_batch_norm'][0]
    generator_dropout_rate = args_dict['generator_dropout_rate'][0]
    generator_learning_rate = args_dict['generator_learning_rate'][0]

    encoder_start_n_units = args_dict['encoder_start_n_units'][0]
    encoder_n_layers = args_dict['encoder_n_layers'][0]
    encoder_activation_fn = args_dict['encoder_activation_fn'][0]
    encoder_batch_norm = args_dict['encoder_batch_norm'][0]
    encoder_dropout_rate = args_dict['encoder_dropout_rate'][0]
    encoder_learning_rate  = args_dict['encoder_learning_rate'][0]

    import os
    import sys
    import uuid
    import random    
    
    import tensorflow as tf
    from adversarialaml.gan_enc_ano import GanAnomalyDetector,  GanAnomalyMonitor 
    from hops import tensorboard
    from hops import model as hops_model

    # Set the number of epochs for trainining.
    EPOCHS = 2

    train_input = ben_td.tf_data(target_name='target', is_training=True)
    train_input_processed = train_input.tf_record_dataset(process=True, batch_size=16, num_epochs=EPOCHS)
    eval_input = ben_td.tf_data(target_name='target', is_training=True)
    eval_input_processed = eval_input.tf_record_dataset(process=True, batch_size=1, num_epochs=EPOCHS)    
    
    if discriminator_dropout_rate > 0.0:
        discriminator_batch_dropout = True
    else:
        discriminator_batch_dropout = False
    

    if discriminator_dropout_rate > 0.0:
        generator_batch_dropout=True
    else:
        generator_batch_dropout=False

    if encoder_dropout_rate > 0.0:
        encoder_batch_dropout=True
    else:
        encoder_batch_dropout=False   


    if discriminator_batch_norm==0:
        discriminator_batch_norm = False
    else:
        discriminator_batch_norm = True
        
    if generator_activation_fn==0:
        generator_activation_fn="tanh"
    else:    
        generator_activation_fn="relu"
        
    if generator_batch_norm==0:
        generator_batch_norm = False
    else:
        generator_batch_norm = True
        
    if encoder_activation_fn==0:
        encoder_activation_fn="tanh"
    else:    
        encoder_activation_fn="relu"
        
    if encoder_batch_norm==0:
        encoder_batch_norm=False
    else:
        encoder_batch_norm=True        
        

    discriminator_double_neurons=False
    discriminator_bottleneck_neurons=True
    generator_double_neurons=True
    generator_bottleneck_neurons=False
        
    # Instantiate the GanAnomalyDetector model.
    gan_anomaly_detector = GanAnomalyDetector(
                input_dim=emb_args_dict['emb_size'][0],
                latent_dim=latent_dim,

                discriminator_start_n_units=emb_args_dict['emb_size'][0],
                discriminator_n_layers=discriminator_n_layers,
                discriminator_activation_fn="sigmoid",
                discriminator_double_neurons=discriminator_double_neurons,
                discriminator_bottleneck_neurons=discriminator_bottleneck_neurons,
                discriminator_batch_norm=discriminator_batch_norm,
                discriminator_batch_dropout=discriminator_batch_dropout,
                discriminator_dropout_rate=discriminator_dropout_rate,
                discriminator_learning_rate=discriminator_learning_rate,
                discriminator_extra_steps=discriminator_extra_steps,

                generator_start_n_units=generator_start_n_units,
                generator_n_layers=generator_n_layers,
                generator_activation_fn=generator_activation_fn,
                generator_double_neurons=generator_double_neurons,
                generator_bottleneck_neurons=generator_bottleneck_neurons,
                generator_batch_norm=generator_batch_norm,
                generator_batch_dropout=generator_batch_dropout,
                generator_dropout_rate=generator_dropout_rate,
                generator_learning_rate=generator_learning_rate,

                encoder_start_n_units=encoder_start_n_units,
                encoder_n_layers=encoder_n_layers,
                encoder_activation_fn=encoder_activation_fn,
                encoder_batch_norm=encoder_batch_norm,
                encoder_batch_dropout=encoder_batch_dropout,
                encoder_dropout_rate=encoder_dropout_rate,
                encoder_learning_rate=encoder_learning_rate,

    )
        
    # Compile the WGAN model.
    gan_anomaly_detector.compile()

    callbacks = [
        #GanAnomalyMonitor(batch_size=1, latent_dim=4, input_dim=16, alpha=0.7, real_data=eval_input_processed),
        tf.keras.callbacks.TensorBoard(log_dir=tensorboard.logdir()),
        tf.keras.callbacks.ModelCheckpoint(filepath=tensorboard.logdir()),
    ]
    # Start training the model.
    history = gan_anomaly_detector.fit(train_input_processed, callbacks=[callbacks])

    metrics={'loss': history.history["g_loss"][0]} 

    # save to the model registry
    export_path = os.getcwd() + '/model-' + str(uuid.uuid4())
    print('Exporting trained model to: {}'.format(export_path))

    # module = ServingModule(gan_anomaly_detector)
    # tf.saved_model.save(module, export_path, signatures={ "score": module.score})
    
    call = gan_anomaly_detector.__call__.get_concrete_function(x=tf.TensorSpec([None], tf.float32))
    tf.saved_model.save(gan_anomaly_detector, export_path, signatures=call)

    print('Done exporting!')
        
    hops_model.export(export_path, 'ganAml', metrics=metrics)
    
    return metrics

## Use above experiments wrapper function to conduct hops training experiments.

In [5]:
from hops import experiment
    
experiment.launch(experiment_wrapper,  name='train_gan_aml', metric_key='loss', local_logdir=False)

An error was encountered:
An error occurred while calling z:org.apache.spark.api.python.PythonRDD.collectAndServe.
: org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 0.0 failed 4 times, most recent failure: Lost task 0.3 in stage 0.0 (TID 3, hopsworksdavit-worker-1.internal.cloudapp.net, executor 1): org.apache.spark.api.python.PythonException: Traceback (most recent call last):
  File "/srv/hops/spark/python/lib/pyspark.zip/pyspark/worker.py", line 377, in main
    process()
  File "/srv/hops/spark/python/lib/pyspark.zip/pyspark/worker.py", line 372, in process
    serializer.dump_stream(func(split_index, iterator), outfile)
  File "/srv/hops/spark/python/lib/pyspark.zip/pyspark/rdd.py", line 2499, in pipeline_func
  File "/srv/hops/spark/python/lib/pyspark.zip/pyspark/rdd.py", line 2499, in pipeline_func
  File "/srv/hops/spark/python/lib/pyspark.zip/pyspark/rdd.py", line 2499, in pipeline_func
  [Previous line repeated 1 more time]
  File "/srv/hops

In [None]:
#https://github.com/tensorflow/tensorflow/issues/25235
#https://stackoverflow.com/questions/59142040/tensorflow-2-0-how-to-change-the-output-signature-while-using-tf-saved-model

In [12]:
import tensorflow as tf

def anomaly_score (x):
    return [x]

@tf.function
def predict_anomaly_score(x):
   inputs = {
        'text': x,
   }
   outputs = {
        'embeddings': anomaly_score(x)
   }
   return outputs


In [13]:
tf.TensorSpec

<class 'tensorflow.python.framework.tensor_spec.TensorSpec'>

In [14]:
predict_anomaly_score.get_concrete_function(x=tf.TensorSpec(shape=[None], dtype=tf.float32))

<ConcreteFunction predict_anomaly_score(x) at 0x7FF9054977D0>

In [4]:
1-1

0

In [None]:
"""
tf.saved_model.save(
    module, 
    export_dir, 
    signatures=my_module_encoder.get_concrete_function(
        text=tf.TensorSpec(shape=None, dtype=tf.string)
    ), 
    options=None
)
"""