In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import tensorflow as tf
from tensorflow.estimator import RunConfig
from training_functions import make_tfr_input_fn, input_layer

# Using a Tensorflow Estimator
As you can see, the ```Estimator``` is the central working horse of ML Engineering. We'll have to provide it with 
- a model function: The model function creates appropriate versions of the hypothesis together with some parameters and the tools to evaluate and train the model.
    - The model function returns ```EstimatorSpec```s for the different phases of the ML lifecycle
- ```EvalSpec``` and ```TrainSpec``` objects that determine the physical characteristics of the training and evaluation phases.
- a ```RunConfig``` that essentially describes the execution environment.

After that, the estimator performs all steps independently, creates logfiles, safe-points, performance metrics, and the entire model update life cycle. At the end we get to a model that we can use for prediction.

<img src="../images/TF_programming_model.png" style="width: 700px"/>

### Configuration

In [4]:
with open('temp_dir.txt') as file:
    temp_dir = file.read()

import os
file_pattern = os.path.join(temp_dir, "training.tfr-*")
file_pattern

training_pattern = os.path.join(temp_dir, "training.tfr-*")
eval_pattern = os.path.join(temp_dir, "eval.tfr-*")

# remove this directory to start from scratch
model_dir = os.path.join(temp_dir, "models" )

In [5]:
![ -d $model_dir ] && echo "Really delete $model_dir?"

In [6]:
# Uncomment if you really want to delete the model and start from scratch
#!rm -rf $model_dir

In [7]:
config = RunConfig(
    model_dir              = model_dir,
    save_summary_steps     = 1,
    save_checkpoints_steps = 100,
    log_step_count_steps   = 10)

In [8]:
training_options={
    'num_epochs': None,  # repeat infinitely
    'shuffle_buffer_size': 1000,
    'prefetch_buffer_size': 1000,
    'reader_num_threads': 10,
    'parser_num_threads': 10,
    'sloppy_ordering': True,
    'distribute': False}

eval_options={
    'num_epochs': None,  # repeat infinitely
    'shuffle_buffer_size': 1000,
    'prefetch_buffer_size': 1000,
    'reader_num_threads': 10,
    'parser_num_threads': 10,
    'sloppy_ordering': True,
    'distribute': False}

test_options={
    'num_epochs': None,  # repeat infinitely
    'shuffle_buffer_size': 1000,
    'prefetch_buffer_size': 1000,
    'reader_num_threads': 10,
    'parser_num_threads': 10,
    'sloppy_ordering': True,
    'distribute': False}

### Input Functions

In [9]:
train_input_fn = make_tfr_input_fn(
    filename_pattern=training_pattern,
    batch_size=1000,
    options = training_options)

eval_input_fn = make_tfr_input_fn(
    filename_pattern=eval_pattern,
    batch_size=1000,
    options = eval_options)

### The model_function
The model function provides ```EstimatorSpec```s, i.e. specifications how to build the model for each of the different cases: training, evaluation and test. Indeed, some models require the actual function to differ slightly between training and evaluation. The model function is the place to specify what exactly is to be calculated during each phase of the ML process. In our case, though, all specifications are essentially the same. Typically, you'd expect the *data scientist* to provide this function, so it's not so important that you fully understand the concept here.

In [10]:
from training_functions import input_layer

def model_function(features, labels, mode):

    my_input_layer = input_layer(features)
    linreg = tf.layers.Dense(name="LinReg", units=1)
    hypothesis =linreg(my_input_layer)

    #
    # For predictions, we just need the hypothesis.
    #
    if mode == tf.estimator.ModeKeys.PREDICT:
        return tf.estimator.EstimatorSpec(
            tf.estimator.ModeKeys.PREDICT, 
            predictions=hypothesis)

    #
    # For evaluation, we need to provide the loss function, too.
    #
    loss = tf.losses.mean_squared_error(labels, hypothesis)
    
    if mode == tf.estimator.ModeKeys.EVAL:
        return tf.estimator.EstimatorSpec(
            tf.estimator.ModeKeys.EVAL,
            loss = loss)

    #
    # And for training, we also need the optimizer
    #
    optimizer = tf.train.AdamOptimizer(learning_rate=1e-0)
    train_op = optimizer.minimize(loss, global_step=tf.train.get_or_create_global_step())
    
    return tf.estimator.EstimatorSpec(  
        tf.estimator.ModeKeys.TRAIN,
        loss = loss,
        train_op = train_op)


### Serving Input Receiver
This function returns a function that is going to be called by the estimator to create a ServingInputReciever. Sounds odd, but is pretty straight-forward. First, we provide a function that will return a tensor. We don't provide the tensor, because the tensor will have to be created in the context (graph and session) of the estimator methods. We use a function to create a function because we're passing a parameter that's necessary but not available to the estimator at runtime. Fine. But why are we doing that, anyway?

Remember the scaling of the $\beta$s that we performed with our Beam pipeline. We saved the transform function as the last step of the pipeline. Here, we dig it out again and provide it to the estimator so it can attach it to the front of its computational graph such that the same scaling is applied to the *signature* data.

In [14]:
import tensorflow_transform as tft
def make_tft_serving_input_fn(metadata_dir):
    
    def _input_fn():
    
        # This is what signature data looks like: no feature cross yet
        placeholders = {
            'beta1': tf.placeholder(name='beta1', shape=[None, 1], dtype=tf.float32),
            'beta2': tf.placeholder(name='beta2', shape=[None, 1], dtype=tf.float32),
            'weekday': tf.placeholder(name='weekday', shape=[None, 1], dtype=tf.int64),
            'hour': tf.placeholder(name='hour', shape=[None, 1], dtype=tf.int64)
        }
    
        transform_output = tft.TFTransformOutput(transform_output_dir=metadata_dir)
        features = transform_output.transform_raw_features(placeholders)
            
        return tf.estimator.export.ServingInputReceiver(features, placeholders)

    return _input_fn


### The Estimator

In [11]:
estimator = tf.estimator.Estimator(
        config=config,
        model_fn=model_function)

INFO:tensorflow:Using config: {'_model_dir': 'C:\\Users\\wgi\\AppData\\Local\\Temp\\tmp_61u0qur\\models', '_tf_random_seed': None, '_save_summary_steps': 1, '_save_checkpoints_steps': 100, '_save_checkpoints_secs': None, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 10, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x000002A4A06D3710>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


Here, we create the exporter that will also save the serving input function such that we can use our saved model with signature stage data that is not yet scaled, one-hot encoded and feature-crossed. The serving input function will take care of taking any pre-processing step into account at prediction time. 

In [12]:
metadata_dir = os.path.join(temp_dir, 'metadata')
!ls $metadata_dir

transform_fn
transformed_metadata


In [15]:
serving_input_fn = make_tft_serving_input_fn(metadata_dir)
exporter = tf.estimator.LatestExporter('exporter', serving_input_fn)

## At last: Let the estimator train our model

In [16]:
max_steps = (
    20000 *    # total number of records
    10 /       # number of epochs I want for training
    1000       # batch size
)

In [17]:
train_spec = tf.estimator.TrainSpec(
    input_fn=train_input_fn, 
    max_steps=max_steps)

eval_spec = tf.estimator.EvalSpec(
    input_fn=eval_input_fn, exporters=exporter,
    steps = 2, # 2 batches for evaluation
    
    throttle_secs=2, # technical stuff - don't bother
    start_delay_secs=0)



In [18]:
tf.estimator.train_and_evaluate(
    estimator,
    train_spec=train_spec,
    eval_spec=eval_spec)

INFO:tensorflow:Not using Distribute Coordinator.
INFO:tensorflow:Running training and evaluation locally (non-distributed).
INFO:tensorflow:Start train and evaluate loop. The evaluate will happen after every checkpoint. Checkpoint frequency is determined based on RunConfig arguments: save_checkpoints_steps 100 or save_checkpoints_secs None.
Instructions for updating:
Colocations handled automatically by placer.
INFO:tensorflow:Calling model_fn.
Instructions for updating:
The old _FeatureColumn APIs are being deprecated. Please use the new FeatureColumn APIs instead.
Instructions for updating:
The old _FeatureColumn APIs are being deprecated. Please use the new FeatureColumn APIs instead.
Instructions for updating:
Use tf.cast instead.
Instructions for updating:
The old _FeatureColumn APIs are being deprecated. Please use the new FeatureColumn APIs instead.
Instructions for updating:
The old _FeatureColumn APIs are being deprecated. Please use the new FeatureColumn APIs instead.
Instru

({'loss': 3.5058045, 'global_step': 200},
 [b'C:\\Users\\wgi\\AppData\\Local\\Temp\\tmp_61u0qur\\models\\export\\exporter\\1558881491'])