## TFMA Notebook example

This notebook describes how to export your model for TFMA and demonstrates the analysis tooling it offers.

## Setup

Import necessary packages.

In [0]:
import apache_beam as beam  
import os
import preprocess
import shutil
import tensorflow as tf
import tensorflow_data_validation as tfdv
import tensorflow_model_analysis as tfma
from google.protobuf import text_format 
from tensorflow.python.lib.io import file_io
from tensorflow_transform.beam.tft_beam_io import transform_fn_io
from tensorflow_transform.coders import example_proto_coder
from tensorflow_transform.saved import saved_transform_io
from tensorflow_transform.tf_metadata import dataset_schema
from tensorflow_transform.tf_metadata import schema_utils
from trainer import task
from trainer import taxi

Helper functions and some constants for running the notebook locally.

In [0]:
BASE_DIR = os.getcwd()

DATA_DIR = os.path.join(BASE_DIR, 'data')

OUTPUT_DIR = os.path.join(BASE_DIR, 'chicago_taxi_output')

# Base dir containing train and eval data
TRAIN_DATA_DIR = os.path.join(DATA_DIR, 'train')
EVAL_DATA_DIR = os.path.join(DATA_DIR, 'eval')

# Base dir where TFT writes training data
TFT_TRAIN_OUTPUT_BASE_DIR = os.path.join(OUTPUT_DIR, 'tft_train')
TFT_TRAIN_FILE_PREFIX = 'train_transformed'

# Base dir where TFT writes eval data
TFT_EVAL_OUTPUT_BASE_DIR = os.path.join(OUTPUT_DIR, 'tft_eval')
TFT_EVAL_FILE_PREFIX = 'eval_transformed'

TF_OUTPUT_BASE_DIR = os.path.join(OUTPUT_DIR, 'tf')

# Base dir where TFMA writes eval data
TFMA_OUTPUT_BASE_DIR = os.path.join(OUTPUT_DIR, 'tfma')

SERVING_MODEL_DIR = 'serving_model_dir'
EVAL_MODEL_DIR = 'eval_model_dir'


def get_tft_train_output_dir(run_id):
    return _get_output_dir(TFT_TRAIN_OUTPUT_BASE_DIR, run_id)


def get_tft_eval_output_dir(run_id):
    return _get_output_dir(TFT_EVAL_OUTPUT_BASE_DIR, run_id)


def get_tf_output_dir(run_id):
    return _get_output_dir(TF_OUTPUT_BASE_DIR, run_id)

def get_tfma_output_dir(run_id):
    return _get_output_dir(TFMA_OUTPUT_BASE_DIR, run_id)

def _get_output_dir(base_dir, run_id):
    return os.path.join(base_dir, 'run_' + str(run_id))

def get_schema_file():
    return os.path.join(OUTPUT_DIR, 'schema.pbtxt')


Clean up output directories.

In [0]:
shutil.rmtree(TFT_TRAIN_OUTPUT_BASE_DIR, ignore_errors=True)
shutil.rmtree(TFT_EVAL_OUTPUT_BASE_DIR, ignore_errors=True)
shutil.rmtree(TF_OUTPUT_BASE_DIR, ignore_errors=True)
shutil.rmtree(get_schema_file(), ignore_errors=True)

## Compute and visualize descriptive data statistics

In [0]:
# Compute stats over training data.
train_stats = tfdv.generate_statistics_from_csv(data_location=os.path.join(TRAIN_DATA_DIR, 'data.csv'))

In [0]:
# Visualize training data stats.
tfdv.visualize_statistics(train_stats)

## Infer a schema

In [0]:
# Infer a schema from the training data stats.
schema = tfdv.infer_schema(statistics=train_stats, infer_feature_shape=False)
tfdv.display_schema(schema=schema)

## Check evaluation data for errors

In [0]:
# Compute stats over eval data.
eval_stats = tfdv.generate_statistics_from_csv(data_location=os.path.join(EVAL_DATA_DIR, 'data.csv'))

In [0]:
# Compare stats of eval data with training data.
tfdv.visualize_statistics(lhs_statistics=eval_stats, rhs_statistics=train_stats,
                          lhs_name='EVAL_DATASET', rhs_name='TRAIN_DATASET')

In [0]:
# Check eval data for errors by validating the eval data stats using the previously inferred schema.
anomalies = tfdv.validate_statistics(statistics=eval_stats, schema=schema)
tfdv.display_anomalies(anomalies)

In [0]:
# Update the schema based on the observed anomalies.

# Relax the minimum fraction of values that must come from the domain for feature company.
company = tfdv.get_feature(schema, 'company')
company.distribution_constraints.min_domain_mass = 0.9

# Add new value to the domain of feature payment_type.
payment_type_domain = tfdv.get_domain(schema, 'payment_type')
payment_type_domain.value.append('Prcard')

# Validate eval stats after updating the schema 
updated_anomalies = tfdv.validate_statistics(eval_stats, schema)
tfdv.display_anomalies(updated_anomalies)

## Freeze the schema

Now that the schema has been reviewed and curated, we will store it in a file to reflect its "frozen" state.

In [0]:
file_io.recursive_create_dir(OUTPUT_DIR)
file_io.write_string_to_file(get_schema_file(), text_format.MessageToString(schema))

## Preprocess Inputs

transform_data is defined in preprocess.py and uses the tensorflow_transform library to perform preprocessing. The same code is used for both local preprocessing in this notebook and preprocessing in the Cloud (via Dataflow).

In [0]:
# Transform eval data
preprocess.transform_data(input_handle=os.path.join(EVAL_DATA_DIR, 'data.csv'),
                          outfile_prefix=TFT_EVAL_FILE_PREFIX, 
                          working_dir=get_tft_eval_output_dir(0),
                          schema_file=get_schema_file(),
                          pipeline_args=['--runner=DirectRunner'])
print('Done')

In [0]:
# Transform training data
preprocess.transform_data(input_handle=os.path.join(TRAIN_DATA_DIR, 'data.csv'),
                          outfile_prefix=TFT_TRAIN_FILE_PREFIX, 
                          working_dir=get_tft_train_output_dir(0),
                          schema_file=get_schema_file(),
                          pipeline_args=['--runner=DirectRunner'])
print('Done')

## Compute statistics over transformed data 

In [0]:
# Compute stats over transformed training data.
TRANSFORMED_TRAIN_DATA = os.path.join(get_tft_train_output_dir(0), TFT_TRAIN_FILE_PREFIX + "*") 
transformed_train_stats = tfdv.generate_statistics_from_tfrecord(data_location=TRANSFORMED_TRAIN_DATA)

In [0]:
# Visualize transformed training data stats and compare to raw training data. 
# Use 'Feature search' to focus on a feature and see statistics pre- and post-transformation.
tfdv.visualize_statistics(transformed_train_stats, train_stats, lhs_name='TRANSFORMED', rhs_name='RAW')

## Prepare the Model

To use TFMA, export the model into an **EvalSavedModel** by calling ``tfma.export.export_eval_savedmodel``.

``tfma.export.export_eval_savedmodel`` is analogous to ``estimator.export_savedmodel`` but exports the evaluation graph as opposed to the training or inference graph. Notice that one of the inputs is ``eval_input_receiver_fn`` which is analogous to ``serving_input_receiver_fn`` for ``estimator.export_savedmodel``. For more details, refer to the documentation for TFMA on Github.

Contruct the **EvalSavedModel** after training is completed.

In [0]:
def run_experiment(hparams):
    """Run the training and evaluate using the high level API"""

    # Train and evaluate the model as usual.
    estimator = task.train_and_maybe_evaluate(hparams)

    # Export TFMA's sepcial EvalSavedModel
    eval_model_dir = os.path.join(hparams.output_dir, EVAL_MODEL_DIR)
    receiver_fn = lambda: eval_input_receiver_fn(hparams.tf_transform_dir)

    tfma.export.export_eval_savedmodel(
        estimator=estimator,
        export_dir_base=eval_model_dir,
        eval_input_receiver_fn=receiver_fn)
    
def eval_input_receiver_fn(working_dir):
    # Extract feature spec from the schema.
    raw_feature_spec = schema_utils.schema_as_feature_spec(schema).feature_spec

    serialized_tf_example = tf.placeholder(
        dtype=tf.string, shape=[None], name='input_example_tensor')

    # First we deserialize our examples using the raw schema.
    features = tf.parse_example(serialized_tf_example, raw_feature_spec)

    # Now that we have our raw examples, we must process them through tft
    _, transformed_features = (
        saved_transform_io.partially_apply_saved_transform(
            os.path.join(working_dir, transform_fn_io.TRANSFORM_FN_DIR),
            features))

    # The key MUST be 'examples'.
    receiver_tensors = {'examples': serialized_tf_example}
    
    # NOTE: Model is driven by transformed features (since training works on the
    # materialized output of TFT, but slicing will happen on raw features.
    features.update(transformed_features)
    
    return tfma.export.EvalInputReceiver(
        features=features,
        receiver_tensors=receiver_tensors,
        labels=transformed_features[taxi.transformed_name(taxi.LABEL_KEY)])

print('Done')

## Train and export the model for TFMA

In [0]:
def run_local_experiment(tft_run_id, tf_run_id, num_layers, first_layer_size, scale_factor):
    """Helper method to train and export the model for TFMA
    
    The caller specifies the input and output directory by providing run ids. The optional parameters
    allows the user to change the modelfor time series view.
    
    Args:
      tft_run_id: The run id for the preprocessing. Identifies the folder containing training data.
      tf_run_id: The run for this training run. Identify where the exported model will be written to.
      num_layers: The number of layers used by the hiden layer.
      first_layer_size: The size of the first hidden layer.
      scale_factor: The scale factor between each layer in in hidden layers.
    """
    hparams = tf.contrib.training.HParams(
        # Inputs: are tf-transformed materialized features
        train_files=os.path.join(get_tft_train_output_dir(tft_run_id), TFT_TRAIN_FILE_PREFIX + '-00000-of-*'),
        eval_files=os.path.join(get_tft_eval_output_dir(tft_run_id), TFT_EVAL_FILE_PREFIX + '-00000-of-*'),
        schema_file=get_schema_file(),
        # Output: dir for trained model
        job_dir=get_tf_output_dir(tf_run_id),
        tf_transform_dir=get_tft_train_output_dir(tft_run_id),
        
        # Output: dir for both the serving model and eval_model which will go into tfma
        # evaluation
        output_dir=get_tf_output_dir(tf_run_id),
        train_steps=10000,
        eval_steps=5000,
        num_layers=num_layers,
        first_layer_size=first_layer_size,
        scale_factor=scale_factor,
        num_epochs=None,
        train_batch_size=40,
        eval_batch_size=40)

    run_experiment(hparams)

print('Done')

In [0]:
run_local_experiment(tft_run_id=0,
                     tf_run_id=0,
                     num_layers=4,
                     first_layer_size=100,
                     scale_factor=0.7)
print('Done')

## Run TFMA to compute metrics
For local analysis, TFMA offers a helper method ``tfma.run_model_analysis``

In [0]:
help(tfma.run_model_analysis)

#### You can also write your own custom pipeline if you want to perform extra transformations on the data before evaluation.

In [0]:
def run_tfma(slice_spec, tf_run_id, tfma_run_id, input_csv, schema_file, add_metrics_callbacks=None):
    """A simple wrapper function that runs tfma locally.
    
    A function that does extra transformations on the data and then run model analysis.
    
    Args:
        slice_spec: The slicing spec for how to slice the data.
        tf_run_id: An id to contruct the model directories with.
        tfma_run_id: An id to construct output directories with.
        input_csv: The evaluation data in csv format.
        schema_file: The file holding a text-serialized schema for the input data.
        add_metrics_callback: Optional list of callbacks for computing extra metrics.
        
    Returns:
        An EvalResult that can be used with TFMA visualization functions.
    """
    eval_model_base_dir = os.path.join(get_tf_output_dir(tf_run_id), EVAL_MODEL_DIR)
    eval_model_dir = os.path.join(eval_model_base_dir, next(os.walk(eval_model_base_dir))[1][0])
    eval_shared_model = tfma.default_eval_shared_model(
        eval_saved_model_path=eval_model_dir,
        add_metrics_callbacks=add_metrics_callbacks)
    schema = taxi.read_schema(schema_file)
    
    print(eval_model_dir)
    
    display_only_data_location = input_csv
    
    with beam.Pipeline() as pipeline:
        csv_coder = taxi.make_csv_coder(schema)
        raw_data = (
            pipeline
            | 'ReadFromText' >> beam.io.ReadFromText(
                input_csv,
                coder=beam.coders.BytesCoder(),
                skip_header_lines=True)
            | 'ParseCSV' >> beam.Map(csv_coder.decode))
        
        # Examples must be in clean tf-example format.
        coder = taxi.make_proto_coder(schema)
        raw_data = (
            raw_data
            | 'ToSerializedTFExample' >> beam.Map(coder.encode))

        _ = (raw_data
             | 'ExtractEvaluateAndWriteResults' >>
             tfma.ExtractEvaluateAndWriteResults(
                 eval_shared_model=eval_shared_model,
                 slice_spec=slice_spec,
                 output_path=get_tfma_output_dir(tfma_run_id),
                 display_only_data_location=input_csv))

    return tfma.load_eval_result(output_path=get_tfma_output_dir(tfma_run_id))
    
print('Done')

#### You can also compute metrics on slices of your data in TFMA. Slices can be specified using ``tfma.slicer.SingleSliceSpec``.

Below are examples of how slices can be specified.

In [0]:
# An empty slice spec means the overall slice, that is, the whole dataset.
OVERALL_SLICE_SPEC = tfma.slicer.SingleSliceSpec()

# Data can be sliced along a feature column
# In this case, data is sliced along feature column trip_start_hour.
FEATURE_COLUMN_SLICE_SPEC = tfma.slicer.SingleSliceSpec(columns=['trip_start_hour'])

# Data can be sliced by crossing feature columns
# In this case, slices are computed for trip_start_day x trip_start_month.
FEATURE_COLUMN_CROSS_SPEC = tfma.slicer.SingleSliceSpec(columns=['trip_start_day', 'trip_start_month'])

# Metrics can be computed for a particular feature value.
# In this case, metrics is computed for all data where trip_start_hour is 12.
FEATURE_VALUE_SPEC = tfma.slicer.SingleSliceSpec(features=[('trip_start_hour', 12)])

# It is also possible to mix column cross and feature value cross.
# In this case, data where trip_start_hour is 12 will be sliced by trip_start_day.
COLUMN_CROSS_VALUE_SPEC = tfma.slicer.SingleSliceSpec(columns=['trip_start_day'], features=[('trip_start_hour', 12)])

ALL_SPECS = [
    OVERALL_SLICE_SPEC,
    FEATURE_COLUMN_SLICE_SPEC, 
    FEATURE_COLUMN_CROSS_SPEC, 
    FEATURE_VALUE_SPEC, 
    COLUMN_CROSS_VALUE_SPEC    
]

#### Let's run TFMA!

In [0]:
tf.logging.set_verbosity(tf.logging.INFO)

tfma_result_1 = run_tfma(input_csv=os.path.join(EVAL_DATA_DIR, 'data.csv'), 
                         tf_run_id=0, 
                         tfma_run_id=1,
                         slice_spec=ALL_SPECS,
                         schema_file=get_schema_file())
print('Done')


## Visualization: Slicing Metrics

To see the slices, either use the name of the column (by setting slicing_column) or provide a tfma.slicer.SingleSliceSpec (by setting slicing_spec). If neither is provided, the overall will be displayed.

The default visualization is **slice overview** when the number of slices is small. It shows the value of a metric for each slice sorted by the another metric. It is also possible to set a threshold to filter out slices with smaller weights.

This view also supports **metrics histogram** as an alternative visualization. It is also the defautl view when the number of slices is large. The results will be divided into buckets and the number of slices / total weights / both can be visualized. Slices with small weights can be fitlered out by setting the threshold. Further filtering can be applied by dragging the grey band. To reset the range, double click the band. Filtering can be used to remove outliers in the visualization and the metrics table below.

In [0]:
# Show data sliced along feature column trip_start_hour.
tfma.view.render_slicing_metrics(
    tfma_result_1, slicing_column='trip_start_hour')

In [0]:
# Show metrics sliced by COLUMN_CROSS_VALUE_SPEC above.
tfma.view.render_slicing_metrics(tfma_result_1, slicing_spec=COLUMN_CROSS_VALUE_SPEC)

In [0]:
# Show overall metrics.
tfma.view.render_slicing_metrics(tfma_result_1)

## Visualization: Plots

TFMA offers a number of built-in plots. To see them, add them to ``add_metrics_callbacks``

In [0]:
tf.logging.set_verbosity(tf.logging.INFO)

tfma_vis = run_tfma(input_csv=os.path.join(EVAL_DATA_DIR, 'data.csv'), 
                    tf_run_id=0,
                    tfma_run_id='vis',
                    slice_spec=ALL_SPECS,
                    schema_file=get_schema_file(),
                    add_metrics_callbacks=[
                        # calibration_plot_and_prediction_histogram computes calibration plot and prediction
                        # distribution at different thresholds.
                        tfma.post_export_metrics.calibration_plot_and_prediction_histogram(),
                        # auc_plots enables precision-recall curve and ROC visualization at different thresholds.
                        tfma.post_export_metrics.auc_plots()
                    ])

print('Done')

Plots must be visualized for an individual slice. To specify a slice, use ``tfma.slicer.SingleSliceSpec``.

In the example below, we are using ``tfma.slicer.SingleSliceSpec(features=[('trip_start_hour', 1)])`` to specify the slice where trip_start_hour is 1.

Plots are interactive:
- Drag to pan
- Scroll to zoom
- Right click to reset the view

Simply hover over the desired data point to see more details.

In [0]:
tfma.view.render_plot(tfma_vis, tfma.slicer.SingleSliceSpec(features=[('trip_start_hour', 1)]))

#### Custom metrics

In addition to plots, it is also possible to compute additional metrics not present at export time or custom metrics metrics using ``add_metrics_callbacks``.

All metrics in ``tf.metrics`` are supported in the callback and can be used to compose other metrics:
https://www.tensorflow.org/api_docs/python/tf/metrics

In the cells below, false negative rate is computed as an example.

In [0]:
# Defines a callback that adds FNR to the result.
def add_fnr_for_threshold(threshold):
    def _add_fnr_callback(features_dict, predictions_dict, labels_dict):
        metric_ops = {}
        prediction_tensor = tf.cast(
            predictions_dict.get(tf.contrib.learn.PredictionKey.LOGISTIC), tf.float64)
        fn_value_op, fn_update_op = tf.metrics.false_negatives_at_thresholds(tf.squeeze(labels_dict), 
                                                                             tf.squeeze(prediction_tensor), 
                                                                             [threshold])
        tp_value_op, tp_update_op = tf.metrics.true_positives_at_thresholds(tf.squeeze(labels_dict), 
                                                                            tf.squeeze(prediction_tensor), 
                                                                            [threshold])
        fnr = fn_value_op[0] / (fn_value_op[0] + tp_value_op[0])
        metric_ops['FNR@' + str(threshold)] = (fnr, tf.group(fn_update_op, tp_update_op)) 
        return metric_ops
    
    return _add_fnr_callback

In [0]:
tf.logging.set_verbosity(tf.logging.INFO)

tfma_fnr = run_tfma(input_csv=os.path.join(EVAL_DATA_DIR, 'data.csv'), 
                    tf_run_id=0,
                    tfma_run_id='fnr',
                    slice_spec=ALL_SPECS,
                    schema_file=get_schema_file(),
                    add_metrics_callbacks=[
                        # Simply add the call here.
                        add_fnr_for_threshold(0.75)
                    ])
tfma.view.render_slicing_metrics(tfma_fnr, slicing_spec=FEATURE_COLUMN_SLICE_SPEC)

## Visualization: Time Series

It is important to track how your model is doing over time. TFMA offers two modes to show your model performs over time.

**Multiple model analysis** shows how model perfoms from one version to another. This is useful early on to see how the addition of new features, change in modeling technique, etc, affects the performance. TFMA offers a convenient method.

In [0]:
help(tfma.multiple_model_analysis)

**Multiple data analysis** shows how a model perfoms under different evaluation data set. This is useful to ensure that model performance does not degrade over time. TFMA offer a conveneient method.

In [0]:
help(tfma.multiple_data_analysis)

It is also possible to compose a time series manually.

In [0]:
# Create different models.

# Run some experiments with different hidden layer configurations.
run_local_experiment(tft_run_id=0,
                     tf_run_id=1,
                     num_layers=3,
                     first_layer_size=200,
                     scale_factor=0.7)

run_local_experiment(tft_run_id=0,
                     tf_run_id=2,
                     num_layers=4,
                     first_layer_size=240,
                     scale_factor=0.5)

print('Done')

In [0]:
tfma_result_2 = run_tfma(input_csv=os.path.join(EVAL_DATA_DIR, 'data.csv'), 
                         tf_run_id=1, 
                         tfma_run_id=2,                         
                         slice_spec=ALL_SPECS,
                         schema_file=get_schema_file())

tfma_result_3 = run_tfma(input_csv=os.path.join(EVAL_DATA_DIR, 'data.csv'), 
                         tf_run_id=2, 
                         tfma_run_id=3,
                         slice_spec=ALL_SPECS,
                         schema_file=get_schema_file())
print('Done')

Like plots, time series view must visualized for a slice too.

In the example below, we are showing the overall slice.

Select a metric to see its time series graph. Hover over each data point to get more details.

In [0]:
eval_results = tfma.make_eval_results([tfma_result_1, tfma_result_2, tfma_result_3], 
                                      tfma.constants.MODEL_CENTRIC_MODE)
tfma.view.render_time_series(eval_results, OVERALL_SLICE_SPEC)


Serialized results can also be used to construct a time series. Thus, there is no need to re-run TFMA for models already evaluated for a long running pipeline.

In [0]:
# Visualize the results in a Time Series. In this case, we are showing the slice specified.
eval_results_from_disk = tfma.load_eval_results([get_tfma_output_dir(1), 
                                                 get_tfma_output_dir(2), 
                                                 get_tfma_output_dir(3)], 
                                                tfma.constants.MODEL_CENTRIC_MODE)
tfma.view.render_time_series(eval_results_from_disk, FEATURE_VALUE_SPEC)