In [1]:
spark

Starting Spark application


ID,YARN Application ID,Kind,State,Spark UI,Driver log
21,application_1606981250434_0001,pyspark,idle,Link,Link


SparkSession available as 'spark'.
<pyspark.sql.session.SparkSession object at 0x7f94f7783f90>

In [2]:
# coding=utf-8
# Copyright 2019 The TensorFlow GAN Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Trains a GANEstimator on MNIST data using `train_and_evaluate`."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

from absl import app
from absl import flags

import tensorflow as tf
from tensorflow_gan.examples.anomaly import train_gan_enc_experiment_lib
import pydoop.hdfs as pydoop

In [3]:
def gan_main(

             gp_w = 1,   # gp_weight
             d_n = 2,    # d_n_neurons
             d_n_l = 1,  # d_n_layers
             d_a_f = 1,  # d_activation_fn
             d_a_d_b = 1,  # d_double_neurons or d_bottlneck_neurons
             d_b_n = 0,  # d_batch_norm
             d_dr = 0,   # d_batch_dropout
             d_b_r = 1,  # d_dropout_rate
             d_lr = 1,   # d_learning_rate
             d_k_b_r = 1, # d_kernel_bias_reg
             d_l1_r = 1, # int_to_l1_rate
             d_l2_r = 1, # int_to_l2_rate

             g_n = 5,    # g_n_neurons
             g_n_l = 1,  # g_n_layers
             g_a_f = 1,  # g_activation_fn
             g_a_d_b = 1,  # g_double_neurons or g_bottlneck_neurons
             g_b_n = 0,  # g_batch_norm
             g_dr = 0,   # g_batch_dropout
             g_b_r =  1,  # g_dropout_rate
             g_k_b_r = 1, # g_kernel_bias_reg
             g_lr =  1,    # generator_lr
             g_l1_r = 1, #g_l1_rate
             g_l2_r = 1, #g_l2_rate

             en_hp = 1  # encoder_n_layers        


):
    
    import tensorflow as tf
    from tensorflow.python.framework.ops import disable_eager_execution

    disable_eager_execution()
    
    #################
    # Name cannot be empty, cannot exceed 256 characters and must match the regular expression: ^[a-zA-Z0-9]+$
    model_name = 'ganomaly' 
    ################# 
    
    int_to_neurons = {
        1: 4096,
        2: 2048,
        3: 1024,
        4: 512, 
        5: 256, 
        6: 128,
        7: 64,
        8: 32
    }

    int_to_learning_rate = {
        1: 0.00001,
        2: 0.001,
        3: 0.0015,
        4: 0.002,
        5: 0.0025,
        6: 0.003,
        7: 0.0035,
        8: 0.004,
        9: 0.0045,
        10: 0.005,
        11: 0.0055,
        12: 0.006,
        13: 0.0065,
        14: 0.007,
        15: 0.0075,
        16: 0.008,
        17: 0.0085,
        18: 0.009,
        19: 0.0095,
        20: 0.01, 
        21: 0.02, 
        22: 0.03, 
        23: 0.04 
    }  
    
    int_to_dropout_rate= {
        1: 0.1,
        2: 0.15,
        3: 0.2,
        4: 0.25,
        5: 0.3,
        6: 0.35,
        7: 0.4,
        8: 0.45,
        9: 0.5,
        10: 0.55,
        11: 0.6 
    }
    
    int_to_activation_fn= {
        1: 'relu',
        2: 'leaky_relu',
        3: 'tanh',
        4: 'selu',
        5: 'linear'
    }
    
    int_to_l1_rate = {
        1: 0.0001,
        2: 0.0002,
        3: 0.0003,
        4: 0.0004,
        5: 0.0005,
        6: 0.0006,
        7: 0.0007,
        8: 0.0008,
        9: 0.0009,
        10: 0.001,
        11: 0.002
    }
    
    int_to_l2_rate = {
        1: 0.4,
        2: 0.45,
        3: 0.5,
        4: 0.55,
        5: 0.6, 
        6: 0.65, 
        7: 0.7, 
        8: 0.75, 
        9: 0.8, 
        10: 0.9, 
        11: 0.95
    }

    int_to_gp_weight = {
        1: 1.0,
        2: 5.0,
        3: 10.0
    }
    
    d_arch_double_bottlneck = d_a_d_b
    g_arch_double_bottlneck = g_a_d_b
    
    
    # ML Hparams.
    n_epochs=1
    data_size=300
    batch_size=16
    #noise_dims=64 #int(int(int_to_neurons[g_n]/2)/2)
    g_output_dim=365
    d_output_dim=1
    feature_dim=1
    time_steps=365
    timeseries=False

    gp_weight=int_to_gp_weight[gp_w]

    d_n_neurons=int_to_neurons[d_n]
    d_n_layers=d_n_l
    d_activation_fn=int_to_activation_fn[d_a_f]
    if d_arch_double_bottlneck == 1: 
        d_double_neurons = True
        d_bottlneck_neurons= False
    elif d_arch_double_bottlneck == 2: 
        d_double_neurons = False
        d_bottlneck_neurons= True
    elif d_arch_double_bottlneck == 3: 
        d_double_neurons = False
        d_bottlneck_neurons= False
    
    if d_b_n == 0:     
        d_batch_norm=False
    else:
        d_batch_norm=True
    
    if d_dr == 0:
        d_batch_dropout=False
    else:     
        d_batch_dropout=True
        
    d_dropout_rate=int_to_dropout_rate[d_b_r]
    d_kernel_bias_reg=d_k_b_r
    discriminator_lr=int_to_learning_rate[d_lr]
    d_l1_rate=int_to_l1_rate[d_l1_r]
    d_l2_rate=int_to_l2_rate[d_l2_r]

    g_n_neurons=int_to_neurons[g_n]
    g_n_layers=g_n_l
    g_activation_fn=int_to_activation_fn[g_a_f]
    if g_arch_double_bottlneck == 1: 
        g_double_neurons = True
        g_bottlneck_neurons= False
    elif g_arch_double_bottlneck == 2: 
        g_double_neurons = False
        g_bottlneck_neurons= True
    elif g_arch_double_bottlneck == 3: 
        g_double_neurons = False
        g_bottlneck_neurons= False
            
    if g_b_n == 0:
        g_batch_norm=False
    else:
        g_batch_norm=True
    
    if g_dr == 0:
        g_batch_dropout=False
    else: 
        g_batch_dropout=True
        
    g_dropout_rate=int_to_dropout_rate[g_b_r]
    g_kernel_bias_reg=g_k_b_r
    generator_lr=int_to_learning_rate[g_lr]
    g_l1_rate=int_to_l1_rate[g_l1_r]
    g_l2_rate=int_to_l2_rate[g_l2_r]

    #################
    # here I privide 8 options for each to not accidentally fail durring hp tuning
    encoder_noise_hp_dict = \
    {1: {
        1:{'noise_dims':8,'encoder_start_num_neurons':128, 'encoder_n_layers':5},
        2:{'noise_dims':16,'encoder_start_num_neurons':128, 'encoder_n_layers':4},
        3:{'noise_dims':32,'encoder_start_num_neurons':128, 'encoder_n_layers':3},
        4:{'noise_dims':64,'encoder_start_num_neurons':128, 'encoder_n_layers':2},
        5:{'noise_dims':64,'encoder_start_num_neurons':128, 'encoder_n_layers':2},
        6:{'noise_dims':64,'encoder_start_num_neurons':128, 'encoder_n_layers':2},
        7:{'noise_dims':64,'encoder_start_num_neurons':128, 'encoder_n_layers':2},
        8:{'noise_dims':64,'encoder_start_num_neurons':128, 'encoder_n_layers':2}
    },

    2: {
        1:{'noise_dims':8,'encoder_start_num_neurons':256, 'encoder_n_layers':6},
        2:{'noise_dims':16,'encoder_start_num_neurons':256, 'encoder_n_layers':5},
        3:{'noise_dims':32,'encoder_start_num_neurons':256, 'encoder_n_layers':4},
        4:{'noise_dims':64,'encoder_start_num_neurons':256, 'encoder_n_layers':3},
        5:{'noise_dims':128,'encoder_start_num_neurons':256, 'encoder_n_layers':2},
        6:{'noise_dims':128,'encoder_start_num_neurons':256, 'encoder_n_layers':2},
        7:{'noise_dims':128,'encoder_start_num_neurons':256, 'encoder_n_layers':2},
        8:{'noise_dims':128,'encoder_start_num_neurons':256, 'encoder_n_layers':2}        
    }, 

    3: {
        1:{'noise_dims':8, 'encoder_start_num_neurons':512, 'encoder_n_layers':7},
        2:{'noise_dims':16, 'encoder_start_num_neurons':512, 'encoder_n_layers':6},
        3:{'noise_dims':32, 'encoder_start_num_neurons':512, 'encoder_n_layers':5},
        4:{'noise_dims':64, 'encoder_start_num_neurons':512, 'encoder_n_layers':4},
        5:{'noise_dims':128, 'encoder_start_num_neurons':512, 'encoder_n_layers':3},
        6:{'noise_dims':256, 'encoder_start_num_neurons':512, 'encoder_n_layers':2},
        7:{'noise_dims':256, 'encoder_start_num_neurons':512, 'encoder_n_layers':2},
        8:{'noise_dims':256, 'encoder_start_num_neurons':512, 'encoder_n_layers':2}        
    }, 

    4: {
        1:{'noise_dim':8,'encoder_start_num_neurons':1024, 'encoder_n_layers':8},
        2:{'noise_dims':16,'encoder_start_num_neurons':1024, 'encoder_n_layers':7},
        3:{'noise_dims':32,'encoder_start_num_neurons':1024, 'encoder_n_layers':6},
        4:{'noise_dims':64,'encoder_start_num_neurons':1024, 'encoder_n_layers':5},
        5:{'noise_dims':128,'encoder_start_num_neurons':1024, 'encoder_n_layers':4},
        6:{'noise_dims':256,'encoder_start_num_neurons':1024, 'encoder_n_layers':3},        
        7:{'noise_dims':512,'encoder_start_num_neurons':1024, 'encoder_n_layers':2},
        8:{'noise_dims':512,'encoder_start_num_neurons':1024, 'encoder_n_layers':2}
    },

    5: {
        1:{'noise_dims':8, 'encoder_start_num_neurons':2048, 'encoder_n_layers':9},
        2:{'noise_dims':16, 'encoder_start_num_neurons':2048, 'encoder_n_layers':8},
        3:{'noise_dims':32, 'encoder_start_num_neurons':2048, 'encoder_n_layers':7},
        4:{'noise_dims':64, 'encoder_start_num_neurons':2048, 'encoder_n_layers':6},
        5:{'noise_dims':128, 'encoder_start_num_neurons':2048, 'encoder_n_layers':5},
        6:{'noise_dims':256, 'encoder_start_num_neurons':2048, 'encoder_n_layers':4},
        7:{'noise_dims':512, 'encoder_start_num_neurons':2048, 'encoder_n_layers':3},
        8:{'noise_dims':1024, 'encoder_start_num_neurons':2048, 'encoder_n_layers':2}
    }}

#     # all these variables are very realate so we will select  generator noise dimentions 
#     # based on combination between g_n_neurons and encoder_n_layers. encoder encoder_start_num_neurons
#     # will be the same as g_n_neurons       

    if g_n_neurons == 2048:
        encoder_noise_hp = encoder_noise_hp_dict[5][en_hp]
        encoder_start_num_neurons = encoder_noise_hp['encoder_start_num_neurons']
        encoder_n_layers = encoder_noise_hp['encoder_n_layers'] 
        noise_dims = encoder_noise_hp['noise_dims']
    elif g_n_neurons == 1024:
        encoder_noise_hp = encoder_noise_hp_dict[4][en_hp]
        encoder_start_num_neurons = encoder_noise_hp['encoder_start_num_neurons']
        encoder_n_layers = encoder_noise_hp['encoder_n_layers'] 
        noise_dims = encoder_noise_hp['noise_dims']
    elif g_n_neurons == 512:
        encoder_noise_hp = encoder_noise_hp_dict[3][en_hp]
        encoder_start_num_neurons = encoder_noise_hp['encoder_start_num_neurons']
        encoder_n_layers = encoder_noise_hp['encoder_n_layers'] 
        noise_dims = encoder_noise_hp['noise_dims']
    elif g_n_neurons == 256:
        encoder_noise_hp = encoder_noise_hp_dict[2][en_hp]
        encoder_start_num_neurons = encoder_noise_hp['encoder_start_num_neurons']
        encoder_n_layers = encoder_noise_hp['encoder_n_layers'] 
        noise_dims = encoder_noise_hp['noise_dims']
    elif g_n_neurons == 128:
        encoder_noise_hp = encoder_noise_hp_dict[1][en_hp]
        encoder_start_num_neurons = encoder_noise_hp['encoder_start_num_neurons']
        encoder_n_layers = encoder_noise_hp['encoder_n_layers'] 
        noise_dims = encoder_noise_hp['noise_dims']
    #################
    
    joint_train=False

    # ML Infra.
    experiment_type='train'
    #model_dir=logdir
    num_gpus_per_worker=1 #hops.devices.get_num_gpus()
    num_train_steps=50
    num_eval_steps=1

    num_summary_steps=1
    log_step_count_steps=1
    save_checkpoints_steps=1
        
    num_reader_parallel_calls=1
    use_dummy_data=False
    
    #########################################################
    data_dir = "hdfs:///Projects/amlsim/gan_sim/"
    ben_dataset_dir = pydoop.path.abspath(data_dir + "train.tfrecord")
    ben_input_files = tf.io.gfile.glob(ben_dataset_dir + "/part-r-*")
    eval_dataset_dir = pydoop.path.abspath(data_dir + "eval.tfrecord") 
    eval_input_files = tf.io.gfile.glob(eval_dataset_dir + "/part-r-*")
    
    training_dataset = ben_input_files
    eval_dataset = eval_input_files
    label_name = "target"
    #########################################################
        
    hparams = train_gan_enc_experiment_lib.HParams(
        
      model_name,  

      n_epochs,
      data_size,
      batch_size,
      noise_dims,
      g_output_dim,
      d_output_dim,
      feature_dim,
      time_steps,
      timeseries,

      gp_weight,

      d_n_neurons,
      d_n_layers,
      d_activation_fn,
      d_double_neurons,
      d_bottlneck_neurons,
      d_batch_norm,
      d_batch_dropout,
      d_dropout_rate,
      d_kernel_bias_reg,
      discriminator_lr,
      d_l1_rate,
      d_l2_rate,

      g_n_neurons,
      g_n_layers,
      g_activation_fn,
      g_double_neurons,
      g_bottlneck_neurons,
      g_batch_norm,
      g_batch_dropout,
      g_dropout_rate,
      g_kernel_bias_reg,
      generator_lr,
      g_l1_rate,
      g_l2_rate,

      encoder_start_num_neurons,
      encoder_n_layers,

      joint_train,

      experiment_type,

      num_train_steps,
      num_eval_steps,
      num_summary_steps,
      log_step_count_steps,
      save_checkpoints_steps,

      training_dataset, 
      eval_dataset,
      label_name,  
        
      num_reader_parallel_calls,
      use_dummy_data,
        
      0.5,
      1   
    )
    
    eval_result = train_gan_enc_experiment_lib.train(hparams)
    return eval_result
#     reporter.broadcast(metric=eval_result["loss"])
#     return eval_result["loss"]

In [4]:
#from hops import experiment
#from hops.experiment import Direction
#def hyperparam_search():
#    search_dict = {
#        
#                'gp_w': [1,3],   # gp_weight
#                'd_n': [2,4],    # d_n_neurons
#                'd_n_l': [1,4],  # d_n_layers
#                'd_a_f': [1,4],  # d_activation_fn
#                'd_a_d_b': [1,3],  # d_double_neurons or d_bottlneck_neurons
#                'd_b_n': [0,1],  # d_batch_norm
#                'd_dr': [0,1],   # d_batch_dropout
#                'd_b_r': [1,4],  # d_dropout_rate
#                'd_lr': [1,4],   # d_learning_rate
#                'd_k_b_r': [1,4], # d_kernel_bias_reg
#                'd_l1_r': [1,4], # int_to_l1_rate
#                'd_l2_r': [1,4], # int_to_l2_rate
#
#                'g_n': [5,6],    # g_n_neurons
#                'g_n_l': [1,4],  # g_n_layers
#                'g_a_f': [1,4],  # g_activation_fn
#                'g_a_d_b': [1,3],  # g_double_neurons or g_bottlneck_neurons
#                'g_b_n': [0,1],  # g_batch_norm
#                'g_dr': [0,1],   # g_batch_dropout
#                'g_b_r': [1,4],  # g_dropout_rate
#                'g_k_b_r': [1,4], # g_kernel_bias_reg
#                'g_lr': [1,4],    # generator_lr
#                'g_l1_r': [1,4], #g_l1_rate
#                'g_l2_r': [1,4], #g_l2_rate
#
#                'en_hp': [1,8]  # encoder_n_layers        
#
#    }
#    
#    log_dir, best_params = experiment.differential_evolution(
#    gan_main, 
#    search_dict, 
#    name='gan_enc_search', 
#    description='GAN anomaly encoder search',
#    local_logdir=True, 
#    population=8,
#    generations = 10,
#    direction=Direction.MIN, 
#    optimization_key='loss'    
#    )
#    return log_dir, best_params
#
#log_dir, best_params = hyperparam_search()
#

In [13]:
from hops import experiment
# experiment.collective_all_reduce(main)
experiment.launch(gan_main)

An error was encountered:
An error occurred while calling z:org.apache.spark.api.python.PythonRDD.collectAndServe.
: org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 1.0 failed 4 times, most recent failure: Lost task 0.3 in stage 1.0 (TID 7, ip-10-0-0-88.us-west-2.compute.internal, executor 2): org.apache.spark.api.python.PythonException: Traceback (most recent call last):
  File "/srv/hops/anaconda/envs/theenv/lib/python3.7/site-packages/tensorflow/python/summary/writer/event_file_writer.py", line 133, in _try_put
    self._event_queue.put(item)
  File "/srv/hops/anaconda/envs/theenv/lib/python3.7/site-packages/tensorflow/python/summary/writer/event_file_writer.py", line 287, in put
    raise QueueClosedError()
tensorflow.python.summary.writer.event_file_writer.QueueClosedError

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/srv/hops/spark/python/lib/pyspark.zip/pyspark/worker.py", line

In [6]:
# result = experiment.lagom(map_fun=main, 
#                            searchspace=sp, 
#                            optimizer='randomsearch', 
#                            direction='min',
#                            num_trials=15, 
#                            name='gan_enc_search', 
#                            hb_interval=5, 
#                            es_interval=5,
#                            es_min=5
#                           )

WARN: Can't reach Maggy server. No progress information and logs available. Job continues running anyway.


In [7]:
from hops import model
from hops.model import Metric
MODEL_NAME="ganomaly"
EVALUATION_METRIC="encoder_loss"

In [8]:
best_model = model.get_best_model(MODEL_NAME, EVALUATION_METRIC, Metric.MIN)

An error was encountered:
No model with name ganomaly and metric encoder_loss could be found.
Traceback (most recent call last):
  File "/srv/hops/anaconda/envs/theenv/lib/python3.7/site-packages/hops/model.py", line 63, in get_best_model
    raise ModelNotFound("No model with name {} and metric {} could be found.".format(name, metric))
hops.model.ModelNotFound: No model with name ganomaly and metric encoder_loss could be found.



In [None]:
print('Model name: ' + best_model['name'])
print('Model version: ' + str(best_model['version']))
print(best_model['metrics'])

An error was encountered:
name 'best_model' is not defined
Traceback (most recent call last):
NameError: name 'best_model' is not defined



In [None]:
from hops import serving

In [None]:
# Create serving
model_path="/Models/" + best_model['name']
response = serving.create_or_update(model_path, MODEL_NAME, serving_type="TENSORFLOW", 
                                 model_version=best_model['version'])

An error was encountered:
name 'best_model' is not defined
Traceback (most recent call last):
NameError: name 'best_model' is not defined



In [None]:
# List all available servings in the project
for s in serving.get_all():
    print(s.name)

In [20]:
# Get serving status
serving.get_status(MODEL_NAME)

'Stopped'

In [21]:
if serving.get_status(MODEL_NAME) == 'Stopped':
    serving.start(MODEL_NAME)

Starting serving with name: ganomaly...
Serving with name: ganomaly successfully started

In [22]:
import time
while serving.get_status(MODEL_NAME) != "Running":
    time.sleep(5) # Let the serving startup correctly
time.sleep(5)

In [23]:
TOPIC_NAME = serving.get_kafka_topic(MODEL_NAME)


In [25]:
import numpy as np
for i in range(20):
    data = {
                'serving_default': 'real_input',
                "instances": [np.random.rand(365).astype(np.float32).tolist()]
            }
    response = serving.make_inference_request(MODEL_NAME, data)
    print(response)

{'predictions': [1724.10559]}
{'predictions': [347.750244]}
{'predictions': [744.655]}
{'predictions': [2042.27856]}
{'predictions': [557.614258]}
{'predictions': [315.85498]}
{'predictions': [1655.20459]}
{'predictions': [316.268921]}
{'predictions': [1744.2124]}
{'predictions': [1149.82349]}
{'predictions': [1569.00781]}
{'predictions': [468.74588]}
{'predictions': [960.264221]}
{'predictions': [1766.28601]}
{'predictions': [985.822876]}
{'predictions': [1204.39709]}
{'predictions': [399.651917]}
{'predictions': [440.584076]}
{'predictions': [1225.974]}
{'predictions': [208.740234]}

In [26]:
l = [np.random.rand(365).astype(np.float32).tolist(),np.random.rand(365).astype(np.float32).tolist(),np.random.rand(365).astype(np.float32).tolist(), np.random.rand(365).astype(np.float32).tolist()]
rdd = sc.parallelize(l)

In [27]:
people = rdd.map(lambda x: { 'serving_default': 'real_input', "instances": [x] }).map(lambda x:  serving.make_inference_request(MODEL_NAME, x)).toDF()
                 
#rdd.map(lambda x: np.append(arr, x))



In [28]:
people.show()

+------------+
| predictions|
+------------+
|[686.622559]|
|[1406.82849]|
|[681.662476]|
|[520.253235]|
+------------+