# In this notebook we will perform hyperparameter tuning and training of adversarial anomaly detection model. 
#### For more details about this model refer to https://arxiv.org/pdf/1905.11034.pdf.

In [1]:
spark

Starting Spark application


ID,YARN Application ID,Kind,State,Spark UI,Driver log
65,application_1612947411090_0010,pyspark,idle,Link,Link


SparkSession available as 'spark'.
<pyspark.sql.session.SparkSession object at 0x7f7d3c0539d0>

## Connect to hsfs and retrieve datasets for training and evaluation 

In [2]:
import hsfs
# Create a connection
connection = hsfs.connection()
# Get the feature store handle for the project's feature store
fs = connection.get_feature_store()

ben_td = fs.get_training_dataset("ben_td", 1)
eval_td = fs.get_training_dataset("test_td", 1)

Connected. Call `.close()` to terminate connection gracefully.

In [3]:
ben_td.read().count()

58539

### Define hopsworks experiments wrapper function and put all the training logic there. 

In [4]:
def experiment_wrapper(
    a,
    b,
    c,
    d, 
    e,
    f,

    g,
    h,
    i,
    j,
    k, 
    l,

    m,
    n,
    o,
    p,
    q, 
    r,
    s,
    t,
    u,
    v):

    latent_dim = a
    discriminator_n_layers = b
    discriminator_batch_norm = c
    discriminator_dropout_rate = d
    discriminator_learning_rate = e
    discriminator_extra_steps = f

    generator_start_n_units = g
    generator_n_layers = h
    generator_activation_fn = i
    generator_batch_norm = j
    generator_dropout_rate = k 
    generator_learning_rate = l

    encoder_start_n_units = m 
    encoder_n_layers = n
    encoder_activation_fn = o
    encoder_batch_norm = p
    encoder_dropout_rate = q 
    encoder_learning_rate = r
    discriminator_activation_fn = s
    
    discriminator_middle_layer_activation_fn = t
    generator_middle_layer_activation_fn = u
    encoder_middle_layer_activation_fn = v
    
    ##########
    int_to_latent_dim= {1:8, 2:16, 3:32,3:64,4:128}
    int_to_start_n_units={1:32, 2:64, 3:128}
    
    int_dropout_rate={
        1: 0.1,
        2: 0.15,
        3: 0.2,
        4: 0.25,
        5: 0.3,
        6: 0.35,
        7: 0.4,
        8: 0.45,
        9: 0.5,
        10: 0.55,
        11: 0.6 
    }
    
    int_to_learning_rate={
        1: 0.00001,
        2: 0.001,
        3: 0.0015,
        4: 0.002,
        5: 0.0025,
        6: 0.003,
        7: 0.0035,
        8: 0.004,
        9: 0.0045,
        10: 0.005,
        11: 0.0055,
        12: 0.006,
        13: 0.0065,
        14: 0.007,
        15: 0.0075,
        16: 0.008,
        17: 0.0085,
        18: 0.009,
        19: 0.0095,
        20: 0.01, 
        21: 0.02, 
        22: 0.03, 
        23: 0.04 
    }  
 
    int_to_act_fn = {
        1: 'linear',        
        2: 'relu',
        3: 'leaky_relu',
        4: 'selu',
        5: 'tanh'
    }

    latent_dim = int_to_latent_dim[latent_dim]
    
    discriminator_dropout_rate = int_dropout_rate[discriminator_dropout_rate]
    discriminator_learning_rate = int_to_learning_rate[discriminator_learning_rate]
    
    generator_start_n_units = int_to_start_n_units[generator_start_n_units]
    generator_dropout_rate = int_dropout_rate[generator_dropout_rate]
    generator_learning_rate = int_to_learning_rate[generator_learning_rate]

    encoder_start_n_units = int_to_start_n_units[encoder_start_n_units]    
    encoder_dropout_rate = int_dropout_rate[encoder_dropout_rate]
    encoder_learning_rate = int_to_learning_rate[encoder_learning_rate]
    
    ##########
    import tensorflow as tf
    from adversarialaml.gan_enc_ano import GanAnomalyDetector,  GanAnomalyMonitor 
    from hops import tensorboard

    # Set the number of epochs for trainining.
    EPOCHS = 20
    BATCH_SIZE=512
    
    TOTAL_SAMPLES = 58539
    STEPS_PER_EPOCH=TOTAL_SAMPLES//BATCH_SIZE

    train_input = ben_td.tf_data(target_name='target', is_training=True)
    train_input_processed = train_input.tf_record_dataset(process=True, batch_size=BATCH_SIZE, num_epochs=EPOCHS)

    eval_input = eval_td.tf_data(target_name='target', is_training=True)
    eval_input_processed = eval_input.tf_record_dataset(process=True, batch_size=BATCH_SIZE, num_epochs=EPOCHS)
    
    discriminator_activation_fn=int_to_act_fn[discriminator_activation_fn]
    discriminator_middle_layer_activation_fn = int_to_act_fn[discriminator_middle_layer_activation_fn]
    
    if discriminator_dropout_rate > 0.0:
        discriminator_batch_dropout = True
    else:
        discriminator_batch_dropout = False
    

    if discriminator_dropout_rate > 0.0:
        generator_batch_dropout=True
    else:
        generator_batch_dropout=False

    if encoder_dropout_rate > 0.0:
        encoder_batch_dropout=True
    else:
        encoder_batch_dropout=False   


    if discriminator_batch_norm==0:
        discriminator_batch_norm = False
    else:
        discriminator_batch_norm = True
        
    generator_activation_fn=int_to_act_fn[generator_activation_fn]
    generator_middle_layer_activation_fn = int_to_act_fn[generator_middle_layer_activation_fn]
        
    if generator_batch_norm==0:
        generator_batch_norm = False
    else:
        generator_batch_norm = True
        
    encoder_activation_fn=int_to_act_fn[encoder_activation_fn]
    encoder_middle_layer_activation_fn = int_to_act_fn[encoder_middle_layer_activation_fn]
        
    if encoder_batch_norm==0:
        encoder_batch_norm=False
    else:
        encoder_batch_norm=True        
        
    discriminator_double_neurons=False
    discriminator_bottleneck_neurons=True
    generator_double_neurons=True
    generator_bottleneck_neurons=False
        
    # Instantiate the GanAnomalyDetector model.
    gan_anomaly_detector = GanAnomalyDetector(
                input_dim=365,
                latent_dim=latent_dim,

                discriminator_start_n_units=365,
                discriminator_n_layers=discriminator_n_layers,
                discriminator_activation_fn=discriminator_activation_fn,
                discriminator_middle_layer_activation_fn=discriminator_middle_layer_activation_fn,
                discriminator_double_neurons=discriminator_double_neurons,
                discriminator_bottleneck_neurons=discriminator_bottleneck_neurons,
                discriminator_batch_norm=discriminator_batch_norm,
                discriminator_batch_dropout=discriminator_batch_dropout,
                discriminator_dropout_rate=discriminator_dropout_rate,
                discriminator_learning_rate=discriminator_learning_rate,
                discriminator_extra_steps=discriminator_extra_steps,

                generator_start_n_units=generator_start_n_units,
                generator_n_layers=generator_n_layers,
                generator_activation_fn=generator_activation_fn,
                generator_middle_layer_activation_fn=generator_middle_layer_activation_fn,
                generator_double_neurons=generator_double_neurons,
                generator_bottleneck_neurons=generator_bottleneck_neurons,
                generator_batch_norm=generator_batch_norm,
                generator_batch_dropout=generator_batch_dropout,
                generator_dropout_rate=generator_dropout_rate,
                generator_learning_rate=generator_learning_rate,

                encoder_start_n_units=encoder_start_n_units,
                encoder_n_layers=encoder_n_layers,
                encoder_activation_fn=encoder_activation_fn,
                encoder_middle_layer_activation_fn=encoder_middle_layer_activation_fn,
                encoder_batch_norm=encoder_batch_norm,
                encoder_batch_dropout=encoder_batch_dropout,
                encoder_dropout_rate=encoder_dropout_rate,
                encoder_learning_rate=encoder_learning_rate,

    )
    
    # Compile the WGAN model.
    gan_anomaly_detector.compile()

    callbacks = [
        GanAnomalyMonitor(batch_size=1, latent_dim=latent_dim, input_dim=365),
        #tf.keras.callbacks.TensorBoard(log_dir=tensorboard.logdir()),
        #tf.keras.callbacks.ModelCheckpoint(filepath=tensorboard.logdir()),
    ]
    # Start training the model.
    history = gan_anomaly_detector.fit(train_input_processed, 
                                       callbacks=[callbacks], 
                                       epochs=EPOCHS, 
                                       steps_per_epoch= STEPS_PER_EPOCH,
                                       validation_data=eval_input_processed,
                                       validation_steps=1)    
    
    import numpy as np 
    metrics={'metric': np.mean(history.history["e_loss"]) + np.mean(history.history["g_loss"])} #np.mean(history.history["eval_anomaly_score"]) - 
    
    return metrics

## The searchspace can be instantiated with parameters

## Use above experiments wrapper function to conduct hops training experiments.

In [5]:
from hops import experiment
def hyperparam_search():
    search_dict = {
        'a':[1, 4],
        'b':[2, 6],
        'c':[0, 1], 
        'd':[1, 11], 
        'e':[1, 23],
        'f':[1, 4],

        'g':[1, 3],
        'h':[2, 6],
        'i':[2, 5],
        'j':[0, 1],
        'k':[1, 11], 
        'l':[1, 23],

        'm':[1, 3],
        'n':[2, 6],
        'o':[2, 5],
        'p':[0, 1],
        'q':[1, 11], 
        'r':[1, 23],
        's':[1, 4],
        't':[1, 4],
        'u':[1, 4],
        'v':[1, 4]
    }
    
    log_dir, best_params = experiment.differential_evolution(
    experiment_wrapper, 
    search_dict, 
    name='aml_gan_evo_2', 
    description='Evolutionary search AML GAN',
    local_logdir=True, 
    population=10,
    generations = 10,
    direction='min'    
    )
    return log_dir, best_params

In [None]:
log_dir, best_params = hyperparam_search()