# In this notebook we will perform hyperparameter tuning and training of adversarial anomaly detection model. 
#### For more details about this model refer to https://arxiv.org/pdf/1905.11034.pdf.

In [1]:
spark

Starting Spark application


ID,YARN Application ID,Kind,State,Spark UI,Driver log
10,application_1612044880670_0011,pyspark,idle,Link,Link


SparkSession available as 'spark'.
<pyspark.sql.session.SparkSession object at 0x7f516e9b1a50>

## Connect to hsfs and retrieve datasets for training and evaluation 

In [2]:
import hsfs
# Create a connection
connection = hsfs.connection()
# Get the feature store handle for the project's feature store
fs = connection.get_feature_store()

ben_td = fs.get_training_dataset("gan_non_sar_training_df", 1)
eval_td = fs.get_training_dataset("gan_eval_df", 1)

Connected. Call `.close()` to terminate connection gracefully.

In [3]:
from hops import experiment
from hops import hdfs
import json
"""
best_hyperparams_path = "Resources/embeddings_best_hp.json"
best_hyperparams = json.loads(hdfs.load(best_hyperparams_path))
args_dict = {}
for key in best_hyperparams.keys():
    args_dict[key] = [best_hyperparams[key]]
"""    

'\nbest_hyperparams_path = "Resources/embeddings_best_hp.json"\nbest_hyperparams = json.loads(hdfs.load(best_hyperparams_path))\nargs_dict = {}\nfor key in best_hyperparams.keys():\n    args_dict[key] = [best_hyperparams[key]]\n'

In [4]:
args_dict = {'walk_number': [40], 'walk_length': [5], 'emb_size': [128]}

### Define hopsworks experiments wrapper function and put all the training logic there. 

In [7]:
def experiment_wrapper(
    a,
    b,
    c,
    d, 
    e,
    f,

    g,
    h,
    i,
    j,
    k, 
    l,

    m,
    n,
    o,
    p,
    q, 
    r,
    s):

    latent_dim = a
    discriminator_n_layers = b
    discriminator_batch_norm = c
    discriminator_dropout_rate = d
    discriminator_learning_rate = e
    discriminator_extra_steps = f

    generator_start_n_units = g
    generator_n_layers = h
    generator_activation_fn = i
    generator_batch_norm = j
    generator_dropout_rate = k 
    generator_learning_rate = l

    encoder_start_n_units = m 
    encoder_n_layers = n
    encoder_activation_fn = o
    encoder_batch_norm = p
    encoder_dropout_rate = q 
    encoder_learning_rate = r
    
    discriminator_activation_fn = s
    
    ##########
    int_to_latent_dim= {1:16, 2:32, 3:64}
    latent_dim = int_to_latent_dim[latent_dim]
    #int_to_discriminator_n_layers= [2, 6],
    #int_to_discriminator_batch_norm= [0, 1], 
    int_to_discriminator_dropout_rate={
        1: 0.1,
        2: 0.15,
        3: 0.2,
        4: 0.25,
        5: 0.3,
        6: 0.35,
        7: 0.4,
        8: 0.45,
        9: 0.5,
        10: 0.55,
        11: 0.6 
    }
    discriminator_dropout_rate = int_to_discriminator_dropout_rate[discriminator_dropout_rate]
    
    int_to_discriminator_learning_rate={
        1: 0.00001,
        2: 0.001,
        3: 0.0015,
        4: 0.002,
        5: 0.0025,
        6: 0.003,
        7: 0.0035,
        8: 0.004,
        9: 0.0045,
        10: 0.005,
        11: 0.0055,
        12: 0.006,
        13: 0.0065,
        14: 0.007,
        15: 0.0075,
        16: 0.008,
        17: 0.0085,
        18: 0.009,
        19: 0.0095,
        20: 0.01, 
        21: 0.02, 
        22: 0.03, 
        23: 0.04 
    }  
    discriminator_learning_rate = int_to_discriminator_learning_rate[discriminator_learning_rate]
    
    # int_to_discriminator_extra_steps=[2, 6],
    
    int_to_generator_start_n_units={1:16, 2:32, 3:64}
    generator_start_n_units = int_to_generator_start_n_units[generator_start_n_units]
    
    #int_to_generator_n_layers=[2, 6]
    
    #int_to_generator_activation_fn=[0, 1],
    
    #int_to_generator_batch_norm=[0, 1],
    
    int_to_generator_dropout_rate={
        1: 0.1,
        2: 0.15,
        3: 0.2,
        4: 0.25,
        5: 0.3,
        6: 0.35,
        7: 0.4,
        8: 0.45,
        9: 0.5,
        10: 0.55,
        11: 0.6 
    } 
    generator_dropout_rate = int_to_generator_dropout_rate[generator_dropout_rate]
    
    int_to_generator_learning_rate={
        1: 0.00001,
        2: 0.001,
        3: 0.0015,
        4: 0.002,
        5: 0.0025,
        6: 0.003,
        7: 0.0035,
        8: 0.004,
        9: 0.0045,
        10: 0.005,
        11: 0.0055,
        12: 0.006,
        13: 0.0065,
        14: 0.007,
        15: 0.0075,
        16: 0.008,
        17: 0.0085,
        18: 0.009,
        19: 0.0095,
        20: 0.01, 
        21: 0.02, 
        22: 0.03, 
        23: 0.04 
    }  
    generator_learning_rate = int_to_generator_learning_rate[generator_learning_rate]

    int_to_encoder_start_n_units={1:16, 2:32, 3:64}
    encoder_start_n_units = int_to_encoder_start_n_units[encoder_start_n_units]
    
    #int_to_encoder_n_layers=[2, 6]
    #int_to_encoder_activation_fn=[0, 1]
    #int_to_encoder_batch_norm=[0, 1]
    
    int_to_encoder_dropout_rate={
        1: 0.1,
        2: 0.15,
        3: 0.2,
        4: 0.25,
        5: 0.3,
        6: 0.35,
        7: 0.4,
        8: 0.45,
        9: 0.5,
        10: 0.55,
        11: 0.6 
    }
    encoder_dropout_rate = int_to_encoder_dropout_rate[encoder_dropout_rate]
    
    int_to_encoder_learning_rate={
        1: 0.00001,
        2: 0.001,
        3: 0.0015,
        4: 0.002,
        5: 0.0025,
        6: 0.003,
        7: 0.0035,
        8: 0.004,
        9: 0.0045,
        10: 0.005,
        11: 0.0055,
        12: 0.006,
        13: 0.0065,
        14: 0.007,
        15: 0.0075,
        16: 0.008,
        17: 0.0085,
        18: 0.009,
        19: 0.0095,
        20: 0.01, 
        21: 0.02, 
        22: 0.03, 
        23: 0.04 
    }  
    encoder_learning_rate = int_to_encoder_learning_rate[encoder_learning_rate]
    
    ##########
    import tensorflow as tf
    from adversarialaml.gan_enc_ano import GanAnomalyDetector,  GanAnomalyMonitor 
    from hops import tensorboard

    # Set the number of epochs for trainining.
    EPOCHS = 10
    BATCH_SIZE=64

    train_input = ben_td.tf_data(target_name='target', is_training=True)
    train_input_processed = train_input.tf_record_dataset(process=True, batch_size=BATCH_SIZE, num_epochs=EPOCHS)

    #eval_input = ben_td.tf_data(target_name='target', is_training=True)
    #eval_input_processed = eval_input.tf_record_dataset(process=True, batch_size=1, num_epochs=EPOCHS)    
    
    if discriminator_activation_fn==0:
        discriminator_activation_fn="linear"
    else:    
        discriminator_activation_fn="selu"
    
    if discriminator_dropout_rate > 0.0:
        discriminator_batch_dropout = True
    else:
        discriminator_batch_dropout = False
    

    if discriminator_dropout_rate > 0.0:
        generator_batch_dropout=True
    else:
        generator_batch_dropout=False

    if encoder_dropout_rate > 0.0:
        encoder_batch_dropout=True
    else:
        encoder_batch_dropout=False   


    if discriminator_batch_norm==0:
        discriminator_batch_norm = False
    else:
        discriminator_batch_norm = True
        
    if generator_activation_fn==0:
        generator_activation_fn="tanh"
    else:    
        generator_activation_fn="selu"
        
    if generator_batch_norm==0:
        generator_batch_norm = False
    else:
        generator_batch_norm = True
        
    if encoder_activation_fn==0:
        encoder_activation_fn="tanh"
    else:    
        encoder_activation_fn="selu"
        
    if encoder_batch_norm==0:
        encoder_batch_norm=False
    else:
        encoder_batch_norm=True        
        

    discriminator_double_neurons=False
    discriminator_bottleneck_neurons=True
    generator_double_neurons=True
    generator_bottleneck_neurons=False
        
    # Instantiate the GanAnomalyDetector model.
    gan_anomaly_detector = GanAnomalyDetector(
                input_dim=args_dict['emb_size'][0],
                latent_dim=latent_dim,

                discriminator_start_n_units=args_dict['emb_size'][0],
                discriminator_n_layers=discriminator_n_layers,
                discriminator_activation_fn=discriminator_activation_fn,
                discriminator_double_neurons=discriminator_double_neurons,
                discriminator_bottleneck_neurons=discriminator_bottleneck_neurons,
                discriminator_batch_norm=discriminator_batch_norm,
                discriminator_batch_dropout=discriminator_batch_dropout,
                discriminator_dropout_rate=discriminator_dropout_rate,
                discriminator_learning_rate=discriminator_learning_rate,
                discriminator_extra_steps=discriminator_extra_steps,

                generator_start_n_units=generator_start_n_units,
                generator_n_layers=generator_n_layers,
                generator_activation_fn=generator_activation_fn,
                generator_double_neurons=generator_double_neurons,
                generator_bottleneck_neurons=generator_bottleneck_neurons,
                generator_batch_norm=generator_batch_norm,
                generator_batch_dropout=generator_batch_dropout,
                generator_dropout_rate=generator_dropout_rate,
                generator_learning_rate=generator_learning_rate,

                encoder_start_n_units=encoder_start_n_units,
                encoder_n_layers=encoder_n_layers,
                encoder_activation_fn=encoder_activation_fn,
                encoder_batch_norm=encoder_batch_norm,
                encoder_batch_dropout=encoder_batch_dropout,
                encoder_dropout_rate=encoder_dropout_rate,
                encoder_learning_rate=encoder_learning_rate,

    )
    
    # Compile the WGAN model.
    gan_anomaly_detector.compile()

    callbacks = [
        #GanAnomalyMonitor(batch_size=1, latent_dim=4, input_dim=16, alpha=0.7, real_data=eval_input_processed),
        tf.keras.callbacks.TensorBoard(log_dir=tensorboard.logdir()),
        tf.keras.callbacks.ModelCheckpoint(filepath=tensorboard.logdir()),
    ]
    
    # Start training the model.
    history = gan_anomaly_detector.fit(train_input_processed) #callbacks=[callbacks]

    metrics={'metric': history.history["d_loss"][0]} 
    
    return metrics

## The searchspace can be instantiated with parameters

## Use above experiments wrapper function to conduct hops training experiments.

In [8]:
from hops import experiment
def hyperparam_search():
    search_dict = {
        'a':[1, 3],
        'b':[2, 6],
        'c':[0, 1], 
        'd':[4, 6], 
        'e':[1, 4],
        'f':[1, 3],

        'g':[1, 3],
        'h':[2, 6],
        'i':[0, 1],
        'j':[0, 1],
        'k':[1, 6], 
        'l':[1, 6],

        'm':[1, 3],
        'n':[2, 6],
        'o':[0, 1],
        'p':[0, 1],
        'q':[1, 6], 
        'r':[1, 6],
        's':[0, 1] 
    }
    
    log_dir, best_params = experiment.differential_evolution(
    experiment_wrapper, 
    search_dict, 
    name='aml_gan_evo', 
    description='Evolutionary search AML GAN',
    local_logdir=False, 
    population=8,
    generations = 10
    )
    return log_dir, best_params

In [None]:
log_dir, best_params = hyperparam_search()