# In this notebook we will perform hyperparameter tuning and training of adversarial anomaly detection model. 
#### For more details about this model refer to https://arxiv.org/pdf/1905.11034.pdf.

In [5]:
spark

<pyspark.sql.session.SparkSession object at 0x7fe9bd399e90>

## Connect to hsfs and retrieve datasets for training and evaluation 

In [6]:
import hsfs
# Create a connection
connection = hsfs.connection()
# Get the feature store handle for the project's feature store
fs = connection.get_feature_store()

ben_td = fs.get_training_dataset("gan_non_sar_training_df", 1)
eval_td = fs.get_training_dataset("gan_eval_df", 1)

Connected. Call `.close()` to terminate connection gracefully.

In [7]:
from hops import experiment
from hops import hdfs
import json
best_hyperparams_path = "Resources/embeddings_best_hp.json"
best_hyperparams = json.loads(hdfs.load(best_hyperparams_path))
args_dict = {}
for key in best_hyperparams.keys():
    args_dict[key] = [best_hyperparams[key]]

### Define hopsworks experiments wrapper function and put all the training logic there. 

In [37]:
def experiment_wrapper(
    discriminator_n_layers,
    generator_n_layers
):
    
    ############################
    latent_dim=4
    # discriminator_n_layers=2
    discriminator_activation_fn="sigmoid"
    discriminator_double_neurons=False
    discriminator_bottleneck_neurons=True
    discriminator_batch_norm=False
    discriminator_batch_dropout=False
    discriminator_dropout_rate=0.0
    discriminator_learning_rate=0.0002
    discriminator_extra_steps = 3

    generator_start_n_units=8
    # generator_n_layers=2
    generator_activation_fn="tanh"
    generator_double_neurons=True
    generator_bottleneck_neurons=False
    generator_batch_norm=False
    generator_batch_dropout=False
    generator_dropout_rate=0.0
    generator_learning_rate=0.0002

    encoder_start_n_units=4
    encoder_n_layers=2
    encoder_activation_fn="tanh"
    encoder_batch_norm=False
    encoder_batch_dropout=False
    encoder_dropout_rate=0.0
    encoder_learning_rate=0.0002
    ############################
    
    import tensorflow as tf
    from adversarialaml.gan_enc_ano import GanAnomalyDetector,  GanAnomalyMonitor 
    from hops import tensorboard

    # Set the number of epochs for trainining.
    EPOCHS = 2

    train_input = ben_td.tf_data(target_name='target', is_training=True)
    train_input_processed = train_input.tf_record_dataset(process=True, batch_size=16, num_epochs=EPOCHS)
    eval_input = ben_td.tf_data(target_name='target', is_training=True)
    eval_input_processed = eval_input.tf_record_dataset(process=True, batch_size=1, num_epochs=EPOCHS)    

    # Instantiate the GanAnomalyDetector model.
    gan_anomaly_detector = GanAnomalyDetector(
                input_dim=args_dict['emb_size'][0],
                latent_dim=latent_dim,

                discriminator_start_n_units=args_dict['emb_size'][0],
                discriminator_n_layers=discriminator_n_layers,
                discriminator_activation_fn=discriminator_activation_fn,
                discriminator_double_neurons=discriminator_double_neurons,
                discriminator_bottleneck_neurons=discriminator_bottleneck_neurons,
                discriminator_batch_norm=discriminator_batch_norm,
                discriminator_batch_dropout=discriminator_batch_dropout,
                discriminator_dropout_rate=discriminator_dropout_rate,
                discriminator_learning_rate=discriminator_learning_rate,
                discriminator_extra_steps=discriminator_extra_steps,

                generator_start_n_units=generator_start_n_units,
                generator_n_layers=generator_n_layers,
                generator_activation_fn=generator_activation_fn,
                generator_double_neurons=generator_double_neurons,
                generator_bottleneck_neurons=generator_bottleneck_neurons,
                generator_batch_norm=generator_batch_norm,
                generator_batch_dropout=generator_batch_dropout,
                generator_dropout_rate=generator_dropout_rate,
                generator_learning_rate=generator_learning_rate,

                encoder_start_n_units=encoder_start_n_units,
                encoder_n_layers=encoder_n_layers,
                encoder_activation_fn=encoder_activation_fn,
                encoder_batch_norm=encoder_batch_norm,
                encoder_batch_dropout=encoder_batch_dropout,
                encoder_dropout_rate=encoder_dropout_rate,
                encoder_learning_rate=encoder_learning_rate,

    )
    
    # Compile the WGAN model.
    gan_anomaly_detector.compile()
    
    history = gan_anomaly_detector.fit(train_input_processed)

    metrics={'metric': history.history["g_loss"][0]} 
    
    return metrics

## The searchspace can be instantiated with parameters

In [38]:
from maggy import Searchspace
sp = Searchspace(discriminator_n_layers=('INTEGER', [2, 3]), generator_n_layers=('INTEGER', [2, 3]))

Hyperparameter added: discriminator_n_layers
Hyperparameter added: generator_n_layers

## Use above experiments wrapper function to conduct hops training experiments.

In [39]:
from maggy import experiment
result = experiment.lagom(experiment_wrapper, 
                           searchspace=sp, 
                           optimizer='randomsearch', 
                           direction='min',
                           num_trials=2, 
                           name='ganaml',
                           hb_interval=5, 
                           es_interval=5,
                           es_min=5
                          )

WARN: Can't reach Maggy server. No progress information and logs available. Job continues running anyway.
Started Maggy Experiment: ganaml, application_1607799579196_0013, run 10

------ RandomSearch Results ------ direction(min) 
BEST combination {"discriminator_n_layers": 3, "generator_n_layers": 3} -- metric -0.988970160484314
WORST combination {"discriminator_n_layers": 3, "generator_n_layers": 2} -- metric -0.37913262844085693
AVERAGE metric -- -0.6840513944625854
EARLY STOPPED Trials -- 0
Total job time 0 hours, 1 minutes, 39 seconds

Finished Experiment

In [None]:
                latent_dim,
                discriminator_n_layers,
                discriminator_activation_fn,
                discriminator_double_neurons,
                discriminator_bottleneck_neurons,
                discriminator_batch_norm,
                discriminator_batch_dropout,
                discriminator_dropout_rate,
                discriminator_learning_rate,
                discriminator_extra_steps,

                generator_start_n_units,
                generator_n_layer,
                generator_activation_fn,
                generator_double_neurons,
                generator_bottleneck_neurons,
                generator_batch_norm,
                generator_batch_dropout,
                generator_dropout_rate=,
                generator_learning_rate=,

                encoder_start_n_units,
                encoder_n_layers,
                encoder_activation_fn,
                encoder_batch_norm,
                encoder_batch_dropout,
                encoder_dropout_rate,
                encoder_learning_rate
