In [None]:
import os
import shutil
import tensorflow as tf
import tensorflow_data_validation as tfdv
import tensorflow_model_analysis as tfma
from google.protobuf import text_format 
from tensorflow.python.lib.io import file_io
from tensorflow_transform.beam.tft_beam_io import transform_fn_io
from tensorflow_transform.coders import example_proto_coder
from tensorflow_transform.saved import saved_transform_io
from tensorflow_transform.tf_metadata import dataset_schema
from tensorflow_transform.tf_metadata import schema_utils
from tfx.examples.chicago_taxi.trainer import task
from tfx.examples.chicago_taxi.trainer import taxi
import tensorflow_metadata as tfm

from  hops import hdfs as hopsfs

In [None]:
BASE_DIR = hopsfs.project_path(exclude_nn_addr=True)
DATA_DIR = os.path.join(BASE_DIR, 'Resources/data')
OUTPUT_DIR = os.path.join(BASE_DIR, 'Resources/taxi_out')
TMP_DIR = os.path.join(BASE_DIR, 'Resources/taxi_tmp')

# Base dir containing train and eval data
TRAIN_DATA_DIR = os.path.join(DATA_DIR, 'train')
EVAL_DATA_DIR = os.path.join(DATA_DIR, 'eval')

# Base dir where TFT writes training data
TFT_TRAIN_OUTPUT_BASE_DIR = os.path.join(OUTPUT_DIR, 'tft_train')
TFT_TRAIN_FILE_PREFIX = 'train_transformed'

# Base dir where TFT writes eval data
TFT_EVAL_OUTPUT_BASE_DIR = os.path.join(OUTPUT_DIR, 'tft_eval')
TFT_EVAL_FILE_PREFIX = 'eval_transformed'

TF_OUTPUT_BASE_DIR = os.path.join(OUTPUT_DIR, 'tf')

# Base dir where TFMA writes eval data
TFMA_OUTPUT_BASE_DIR = os.path.join(OUTPUT_DIR, 'tfma')

SERVING_MODEL_DIR = 'serving_model_dir'
EVAL_MODEL_DIR = 'eval_model_dir'


def get_tft_train_output_dir(run_id):
    return _get_output_dir(TFT_TRAIN_OUTPUT_BASE_DIR, run_id)


def get_tft_eval_output_dir(run_id):
    return _get_output_dir(TFT_EVAL_OUTPUT_BASE_DIR, run_id)


def get_tf_output_dir(run_id):
    return _get_output_dir(TF_OUTPUT_BASE_DIR, run_id)

def get_tfma_output_dir(run_id):
    return _get_output_dir(TFMA_OUTPUT_BASE_DIR, run_id)

def _get_output_dir(base_dir, run_id):
    return os.path.join(base_dir, 'run_' + str(run_id))

def get_schema_file():
    return os.path.join(OUTPUT_DIR, 'schema.pbtxt')


## Prepare the Model

To use TFMA, export the model into an **EvalSavedModel** by calling ``tfma.export.export_eval_savedmodel``.

``tfma.export.export_eval_savedmodel`` is analogous to ``estimator.export_savedmodel`` but exports the evaluation graph as opposed to the training or inference graph. Notice that one of the inputs is ``eval_input_receiver_fn`` which is analogous to ``serving_input_receiver_fn`` for ``estimator.export_savedmodel``. For more details, refer to the documentation for TFMA on Github.

Contruct the **EvalSavedModel** after training is completed.

In [None]:
def run_experiment(hparams):
    """Run the training and evaluate using the high level API"""

    # Train and evaluate the model as usual.
    estimator = task.train_and_maybe_evaluate(hparams)

    # Export TFMA's sepcial EvalSavedModel
    eval_model_dir = os.path.join(hparams.output_dir, EVAL_MODEL_DIR)
    from hops import tensorboard
    tensorboard.logdir = eval_model_dir
    receiver_fn = lambda: eval_input_receiver_fn(hparams.tf_transform_dir)

    tfma.export.export_eval_savedmodel(
        estimator=estimator,
        export_dir_base=eval_model_dir,
        eval_input_receiver_fn=receiver_fn)
    
def eval_input_receiver_fn(working_dir):
    schema = tfm.proto.v0.schema_pb2.Schema()
    schema_text = file_io.read_file_to_string(get_schema_file())
    text_format.Parse(schema_text, schema)
    # Extract feature spec from the schema.
    raw_feature_spec = schema_utils.schema_as_feature_spec(schema).feature_spec

    serialized_tf_example = tf.placeholder(
        dtype=tf.string, shape=[None], name='input_example_tensor')

    # First we deserialize our examples using the raw schema.
    features = tf.parse_example(serialized_tf_example, raw_feature_spec)

    # Now that we have our raw examples, we must process them through tft
    _, transformed_features = (
        saved_transform_io.partially_apply_saved_transform(
            os.path.join(working_dir, transform_fn_io.TRANSFORM_FN_DIR),
            features))

    # The key MUST be 'examples'.
    receiver_tensors = {'examples': serialized_tf_example}
    
    # NOTE: Model is driven by transformed features (since training works on the
    # materialized output of TFT, but slicing will happen on raw features.
    features.update(transformed_features)
    
    return tfma.export.EvalInputReceiver(
        features=features,
        receiver_tensors=receiver_tensors,
        labels=transformed_features[taxi.transformed_name(taxi.LABEL_KEY)])

## Train and export the model for TFMA


In [None]:
def run_local_experiment(tft_run_id=0, tf_run_id=0, num_layers=4, first_layer_size=100, scale_factor=0.7):
    """Helper method to train and export the model for TFMA
    
    The caller specifies the input and output directory by providing run ids. The optional parameters
    allows the user to change the modelfor time series view.
    
    Args:
      tft_run_id: The run id for the preprocessing. Identifies the folder containing training data.
      tf_run_id: The run for this training run. Identify where the exported model will be written to.
      num_layers: The number of layers used by the hiden layer.
      first_layer_size: The size of the first hidden layer.
      scale_factor: The scale factor between each layer in in hidden layers.
    """
    hparams = tf.contrib.training.HParams(
        # Inputs: are tf-transformed materialized features
        train_files=os.path.join(get_tft_train_output_dir(tft_run_id), TFT_TRAIN_FILE_PREFIX + '-00000-of-*'),
        eval_files=os.path.join(get_tft_eval_output_dir(tft_run_id), TFT_EVAL_FILE_PREFIX + '-00000-of-*'),
        schema_file=get_schema_file(),
        # Output: dir for trained model
        job_dir=get_tf_output_dir(tf_run_id),
        tf_transform_dir=get_tft_train_output_dir(tft_run_id),
        
        # Output: dir for both the serving model and eval_model which will go into tfma
        # evaluation
        output_dir=get_tf_output_dir(tf_run_id), #get_experiments_dir()
        train_steps=10000,
        eval_steps=5000,
        num_layers=num_layers,
        first_layer_size=first_layer_size,
        scale_factor=scale_factor,
        num_epochs=None,
        train_batch_size=40,
        eval_batch_size=40)

    run_experiment(hparams)

In [None]:
from hops import experiment
experiment.launch(lambda: run_local_experiment(0,0,4,100,0.7),
                 name='Chicago taxi training', 
                 description='TFX Chicago taxi model training with Hopsworks Experiment')