# Iceberg Classification Step 2: Model Training with Single GPU
The following code includes demonstration for:
- get data (``TFRecord``) from ``feature store``
- training with ``TFRecord`` on a single GPU

In [2]:
import tensorflow as tf
print("Version of TensorFlow is {}".format(tf.__version__))

Version of TensorFlow is 1.14.0

In [3]:
from hops import featurestore
from hops import experiment

In [4]:
def create_tf_dataset(tfrecord_path, name_list):
    dataset_dir = featurestore.get_training_dataset_path(tfrecord_path)
    input_files = tf.gfile.Glob(dataset_dir + "/part-r-*")
    dataset = tf.data.TFRecordDataset(input_files)
    # 'tf_record_schema' is needed because we need to parse a single example from all the TFRecords we have
    tf_record_schema = featurestore.get_training_dataset_tf_record_schema(tfrecord_path)

    def decode(example_proto):
        example = tf.parse_single_example(example_proto, tf_record_schema)
        x = tf.stack([example[name_list[0]], example[name_list[1]], example[name_list[2]]], axis=1)
        x = tf.reshape(x, [75, 75, 3])
        y = [tf.cast(example[name_list[3]], tf.float32)]
        return x,y
    
    dataset = dataset.map(decode).shuffle(SHUFFLE_BUFFER_SIZE).batch(BATCH_SIZE).repeat(NUM_EPOCHS)
    return dataset

In [5]:
def create_model():
    model = tf.keras.models.Sequential()
    
    #Conv Layer 1
    model.add(tf.keras.layers.Conv2D(64, kernel_size=(3, 3), activation='relu', input_shape=INPUT_SHAPE))
    model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2), strides=(2,2)))
    model.add(tf.keras.layers.Dropout(0.2))

    #Conv Layer 2
    model.add(tf.keras.layers.Conv2D(128, kernel_size=(3, 3), activation='relu' ))
    model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
    model.add(tf.keras.layers.Dropout(0.2))

    #Conv Layer 3
    model.add(tf.keras.layers.Conv2D(128, kernel_size=(3, 3), activation='relu'))
    model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
    model.add(tf.keras.layers.Dropout(0.2))

    #Conv Layer 4
    model.add(tf.keras.layers.Conv2D(64, kernel_size=(3, 3), activation='relu'))
    model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
    model.add(tf.keras.layers.Dropout(0.2))

    #Flatten the data for upcoming dense layers
    model.add(tf.keras.layers.Flatten())

    #Dense Layers
    model.add(tf.keras.layers.Dense(512))
    model.add(tf.keras.layers.Activation('relu'))
    model.add(tf.keras.layers.Dropout(0.2))

    #Dense Layer 2
    model.add(tf.keras.layers.Dense(256))
    model.add(tf.keras.layers.Activation('relu'))
    model.add(tf.keras.layers.Dropout(0.2))

    #Sigmoid Layer
    model.add(tf.keras.layers.Dense(1))
    model.add(tf.keras.layers.Activation('sigmoid'))
    return model

In [6]:
def train_fn():
    from hops import tensorboard
    train_tfrecord_path = "train_tfrecords_iceberg_classification_dataset"
    train_name_list = ["band_1", "band_2", "band_avg", "is_iceberg"]
    train_dataset = create_tf_dataset(train_tfrecord_path, train_name_list)
    
    test_tfrecord_path = "test_tfrecords_iceberg_classification_dataset"
    test_name_list = ["band_1", "band_2", "band_avg", "is_iceberg"]
    test_dataset = create_tf_dataset(test_tfrecord_path, test_name_list)
    
    
    model = create_model()
    model.compile(optimizer=tf.train.AdamOptimizer(LEARNING_RATE), loss='binary_crossentropy',  metrics=['accuracy'])
    tb_callback = tf.keras.callbacks.TensorBoard(log_dir=tensorboard.logdir(), histogram_freq=0, write_graph=True, write_images=True)
    callbacks = [tb_callback]
    callbacks.append(tf.keras.callbacks.ModelCheckpoint(tensorboard.logdir() + '/checkpoint-{epoch}.h5',
                    monitor='acc', verbose=0, save_best_only=True))
    run_config = tf.estimator.RunConfig(
            train_distribute=tf.contrib.distribute.MirroredStrategy())

    tf.keras.backend.set_learning_phase(True)
    keras_estimator = tf.keras.estimator.model_to_estimator(keras_model=model, config=run_config, model_dir=tensorboard.logdir())
    metrics = tf.estimator.train_and_evaluate(keras_estimator, 
                                              train_spec=tf.estimator.TrainSpec(input_fn=lambda:create_tf_dataset(train_tfrecord_path, train_name_list)), 
                                              eval_spec=tf.estimator.EvalSpec(input_fn=lambda:create_tf_dataset(train_tfrecord_path, train_name_list)))
    
    export_model(keras_estimator, 1)
    return metrics["accuracy"]

In [None]:
def export_model(classifier, version):
    """
    Exports trained model 
    
    Args:
        :classifier: the model to export
        :version: version of the model to export
    """
    from tensorflow_transform.tf_metadata import schema_utils
    from hops import hdfs
    import tensorflow_model_analysis as tfma
    import tensorflow_metadata as tfm

    def _serving_input_receiver_fn():
        # key (e.g. 'examples') should be same with the inputKey when you 
        # buid the request for prediction
        receiver_tensors = {"conv2d_input":tf.placeholder(dtype=tf.float32,shape=[1,75,75,3])}
        return tf.estimator.export.ServingInputReceiver(receiver_tensors, receiver_tensors)
   
    from hops import serving
    import os
    local_export_dir = os.getcwd()
    exported_path = classifier.export_savedmodel(local_export_dir, _serving_input_receiver_fn)
    
    exported_path = exported_path.decode("utf-8")
    serving.export(exported_path, "icebergmodel", version, overwrite=True)

In [27]:
# def export_model(classifier, version):
#     """
#     Exports trained model 
    
#     Args:
#         :classifier: the model to export
#         :version: version of the model to export
#     """
#     from tensorflow_transform.tf_metadata import schema_utils
#     from hops import hdfs
#     import tensorflow_model_analysis as tfma
#     import tensorflow_metadata as tfm

#     def _serving_input_receiver_fn():
#         # key (e.g. 'examples') should be same with the inputKey when you 
#         # buid the request for prediction
#         receiver_tensors = {"conv2d_input":tf.placeholder(dtype=tf.float32,shape=[1,75,75,3])}
#         return tf.estimator.export.ServingInputReceiver(receiver_tensors, receiver_tensors)
#     def eval_input_receiver_fn():
#         # key (e.g. 'examples') should be same with the inputKey when you 
#         # buid the request for prediction
#         receiver_tensors = {"examples":tf.placeholder(dtype=tf.float32,shape=[1,75,75,3])}
#         # Extract feature spec from the schema.
#         hdfs.copy_to_local('hdfs:///Projects/ExtremeEarth/ExtremeEarth_Training_Datasets/train_tfrecords_iceberg_classification_dataset_1/tf_record_schema.txt', '')
#         with open('tf_record_schema.txt','r') as f:
#             schema = f.read()
#             schema = tfm.proto.v0.schema_pb2.Schema().FromString(schema)
# #         raw_feature_spec = schema_utils.schema_as_feature_spec(schema).feature_spec
#         conv2d_input = tf.feature_column.numeric_column('conv2d_input')
#         feature_spec =  tf.feature_column.make_parse_example_spec(conv2d_input)
#         serialized_tf_example = tf.placeholder(dtype=tf.float32,shape=[1,75,75,3], name='input_example_tensor')
#         features = tf.io.parse_example(receiver_tensors, feature_spec)

#         # First we deserialize our examples using the raw schema.
# #         features = tf.parse_example(serialized_tf_example, raw_feature_spec)

#         return tfma.export.EvalInputReceiver(receiver_tensors=receiver_tensors, features=features, labels=features['is_iceberg'])


#     from hops import serving
#     import os
#     local_export_dir = os.getcwd()
#     exported_path = classifier.export_savedmodel(local_export_dir, _serving_input_receiver_fn)
    
#     # Also export the EvalSavedModel
#     tfma.export.export_eval_savedmodel(
#     estimator=classifier, export_dir_base="hdfs:///Projects/ExtremeEarth/tfx/tfma",
#     eval_input_receiver_fn=eval_input_receiver_fn)

#     exported_path = exported_path.decode("utf-8")
#     serving.export(exported_path, "icebergmodel", version, overwrite=True)

In [28]:
# Hyperparameter for TFRecords
# NUM_EPOCHS = 150
NUM_EPOCHS = 1 # as we are limited with CPU for demo
BATCH_SIZE = 32
SHUFFLE_BUFFER_SIZE = 10000
# Hyperparameter for learning rate
LEARNING_RATE = 0.001
# Input shape of the model
INPUT_SHAPE= (75, 75, 3)

In [29]:
experiment.launch(train_fn, name="Iceberg_classification_with_featurestore_and_TFRecords", local_logdir=False)

An error occurred while calling z:org.apache.spark.api.python.PythonRDD.collectAndServe.
: org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 7.0 failed 1 times, most recent failure: Lost task 0.0 in stage 7.0 (TID 7, localhost, executor 6): org.apache.spark.api.python.PythonException: Traceback (most recent call last):
  File "/srv/hops/spark/python/lib/pyspark.zip/pyspark/worker.py", line 377, in main
    process()
  File "/srv/hops/spark/python/lib/pyspark.zip/pyspark/worker.py", line 372, in process
    serializer.dump_stream(func(split_index, iterator), outfile)
  File "/srv/hops/spark/python/lib/pyspark.zip/pyspark/rdd.py", line 2499, in pipeline_func
  File "/srv/hops/spark/python/lib/pyspark.zip/pyspark/rdd.py", line 2499, in pipeline_func
  File "/srv/hops/spark/python/lib/pyspark.zip/pyspark/rdd.py", line 2499, in pipeline_func
  File "/srv/hops/spark/python/lib/pyspark.zip/pyspark/rdd.py", line 352, in func
  File "/srv/hops/spark/python/lib/p

## END of the Step2