# In this notebook we will perform hyperparameter tuning and training of adversarial anomaly detection model. 
#### For more details about this model refer to https://arxiv.org/pdf/1905.11034.pdf.

In [1]:
spark

Starting Spark application


ID,YARN Application ID,Kind,State,Spark UI,Driver log
45,application_1607211657348_0047,pyspark,idle,Link,Link


SparkSession available as 'spark'.
<pyspark.sql.session.SparkSession object at 0x7f5b7732af10>

## Connect to hsfs and retrieve datasets for training and evaluation 

In [2]:
import hsfs
# Create a connection
connection = hsfs.connection()
# Get the feature store handle for the project's feature store
fs = connection.get_feature_store()

ben_td = fs.get_training_dataset("gan_non_sar_training_df", 1)
eval_td = fs.get_training_dataset("gan_eval_df", 1)

Connected. Call `.close()` to terminate connection gracefully.

### Define hopsworks experiments wrapper function and put all the training logic there. 

In [7]:
def experiment_wrapper():

    import tensorflow as tf
    from model.gan_enc_ano import GanAnomalyDetector,  GanAnomalyMonitor 
    from hops import tensorboard

    # Set the number of epochs for trainining.
    EPOCHS = 2

    train_input = ben_td.tf_data(target_name='target', is_training=True)
    train_input_processed = train_input.tf_record_dataset(process=True, batch_size=16, num_epochs=EPOCHS)
    eval_input = ben_td.tf_data(target_name='target', is_training=True)
    eval_input_processed = eval_input.tf_record_dataset(process=True, batch_size=1, num_epochs=EPOCHS)    

    # Instantiate the GanAnomalyDetector model.
    gan_anomaly_detector = GanAnomalyDetector(

                input_dim=16,
                latent_dim=4,

                discriminator_start_n_units=16,
                discriminator_n_layers=2,
                discriminator_activation_fn="sigmoid",
                discriminator_double_neurons=False,
                discriminator_bottleneck_neurons=True,
                discriminator_batch_norm=False,
                discriminator_batch_dropout=False,
                discriminator_dropout_rate=0.0,
                discriminator_learning_rate=0.0002,
                discriminator_extra_steps = 3,

                generator_start_n_units=8,
                generator_n_layers=2,
                generator_activation_fn="tanh",
                generator_double_neurons=True,
                generator_bottleneck_neurons=False,
                generator_batch_norm=False,
                generator_batch_dropout=False,
                generator_dropout_rate=0.0,
                generator_learning_rate=0.0002,

                encoder_start_n_units=4,
                encoder_n_layers=2,
                encoder_activation_fn="tanh",
                encoder_batch_norm=False,
                encoder_batch_dropout=False,
                encoder_dropout_rate=0.0,
                encoder_learning_rate=0.0002,

    )
    
    # Compile the WGAN model.
    gan_anomaly_detector.compile()

    callbacks = [
        #GanAnomalyMonitor(batch_size=1, latent_dim=4, input_dim=16, alpha=0.7, real_data=eval_input_processed),
        tf.keras.callbacks.TensorBoard(log_dir=tensorboard.logdir()),
        tf.keras.callbacks.ModelCheckpoint(filepath=tensorboard.logdir()),
    ]
    # Start training the model.
    history = gan_anomaly_detector.fit(train_input_processed, callbacks=[callbacks])
    
    return history.history["g_loss"]


## Use above experiments wrapper function to conduct hops training experiments.

In [8]:
from hops import experiment
# experiment.collective_all_reduce(main)
experiment.launch(experiment_wrapper)

Finished Experiment 

('hdfs://rpc.namenode.service.consul:8020/Projects/amlsim2/Experiments/application_1607211657348_0047_3', {'metric': [-0.3336649537086487], 'log': 'Experiments/application_1607211657348_0047_3/output.log'})