# Build Experiment from TF Estimator

Embeds a 3 layer FCN model to predict MNIST handwritten digits in a Tensorflow Experiment. FCN model is built using the Estimator __DNNClassifier__ from the __tf.contrib.learn__ package.

In [1]:
from __future__ import division, print_function
import matplotlib.pyplot as plt
import numpy as np
import os
import shutil
import tensorflow as tf

In [2]:
DATA_DIR = "../../data"
TRAIN_FILE = os.path.join(DATA_DIR, "mnist_train.csv")
TEST_FILE = os.path.join(DATA_DIR, "mnist_test.csv")

MODEL_DIR = os.path.join(DATA_DIR, "expt-tf-model")

NUM_FEATURES = 784
NUM_CLASSES = 10
NUM_STEPS = 100

## Prepare Data

In [3]:
def parse_file(filename):
    xdata, ydata = [], []
    fin = open(filename, "rb")
    i = 0
    for line in fin:
        if i % 10000 == 0:
            print("{:s}: {:d} lines read".format(
                os.path.basename(filename), i))
        cols = line.strip().split(",")
        ydata.append(int(cols[0]))
        xdata.append([float(x) / 255. for x in cols[1:]])
        i += 1
    fin.close()
    print("{:s}: {:d} lines read".format(os.path.basename(filename), i))
    y = np.array(ydata)
    X = np.array(xdata)
    return X, y

Xtrain, ytrain = parse_file(TRAIN_FILE)
Xtest, ytest = parse_file(TEST_FILE)
print(Xtrain.shape, ytrain.shape, Xtest.shape, ytest.shape)

mnist_train.csv: 0 lines read
mnist_train.csv: 10000 lines read
mnist_train.csv: 20000 lines read
mnist_train.csv: 30000 lines read
mnist_train.csv: 40000 lines read
mnist_train.csv: 50000 lines read
mnist_train.csv: 60000 lines read
mnist_test.csv: 0 lines read
mnist_test.csv: 10000 lines read
(60000, 784) (60000,) (10000, 784) (10000,)


In [4]:
# these functions are parameters to the classifier
def train_input_fn():
    return tf.constant(Xtrain), tf.constant(ytrain)

def test_input_fn():
    return tf.constant(Xtest), tf.constant(ytest)

## Define Estimator

The tf.contrib.learn package provides several Estimators out of the box.
    * tf.contrib.learn.LinearRegressor
    * tf.contrib.learn.LinearClassifier
    * tf.contrib.learn.DNNRegressor
    * tf.contrib.learn.DNNClassifier

In [5]:
shutil.rmtree(MODEL_DIR, ignore_errors=True)
feature_cols = [tf.contrib.layers.real_valued_column("", 
    dimension=NUM_FEATURES)]
estimator = tf.contrib.learn.DNNClassifier(feature_columns=feature_cols,
    hidden_units=[512, 256], n_classes=NUM_CLASSES,
    model_dir=MODEL_DIR)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_save_checkpoints_secs': 600, '_num_ps_replicas': 0, '_keep_checkpoint_max': 5, '_task_type': None, '_is_chief': True, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x135d23d10>, '_model_dir': '../../data/expt-tf-model', '_save_checkpoints_steps': None, '_keep_checkpoint_every_n_hours': 10000, '_session_config': None, '_tf_random_seed': None, '_environment': 'local', '_num_worker_replicas': 0, '_task_id': 0, '_save_summary_steps': 100, '_tf_config': gpu_options {
  per_process_gpu_memory_fraction: 1
}
, '_evaluation_master': '', '_master': ''}


## Train Estimator

In [6]:
estimator.fit(input_fn=train_input_fn, steps=NUM_STEPS)

Instructions for updating:
Please switch to tf.summary.scalar. Note that tf.summary.scalar uses the node name instead of the tag. This means that TensorFlow will automatically de-duplicate summary names based on the scope they are created in. Also, passing a tensor or list of tags to a scalar summary op is no longer supported.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Saving checkpoints for 1 into ../../data/expt-tf-model/model.ckpt.
INFO:tensorflow:loss = 2.33514, step = 1
INFO:tensorflow:Saving checkpoints for 100 into ../../data/expt-tf-model/model.ckpt.
INFO:tensorflow:Loss for final step: 0.320789.


DNNClassifier(params={'head': <tensorflow.contrib.learn.python.learn.estimators.head._MultiClassHead object at 0x107d0b590>, 'hidden_units': [512, 256], 'feature_columns': (_RealValuedColumn(column_name='', dimension=784, default_value=None, dtype=tf.float32, normalizer=None),), 'embedding_lr_multipliers': None, 'optimizer': None, 'dropout': None, 'gradient_clip_norm': None, 'activation_fn': <function relu at 0x11061ff50>, 'input_layer_min_slice_size': None})

## Evaluate Estimator

In [7]:
accuracy_score = estimator.evaluate(input_fn=test_input_fn,
                                    steps=1)["accuracy"]
print("accuracy: {:.3f}".format(accuracy_score))

Instructions for updating:
Please switch to tf.summary.scalar. Note that tf.summary.scalar uses the node name instead of the tag. This means that TensorFlow will automatically de-duplicate summary names based on the scope they are created in. Also, passing a tensor or list of tags to a scalar summary op is no longer supported.
INFO:tensorflow:Starting evaluation at 2017-05-31-00:24:40
INFO:tensorflow:Restoring parameters from ../../data/expt-tf-model/model.ckpt-100
INFO:tensorflow:Evaluation [1/1]
INFO:tensorflow:Finished evaluation at 2017-05-31-00:24:41
INFO:tensorflow:Saving dict for global step 100: accuracy = 0.917, global_step = 100, loss = 0.305549
accuracy: 0.917


## alternatively...
## Define Experiment

A model is wrapped in an Estimator, which is then wrapped in an Experiment. Once you have an Experiment, you can run this in a distributed manner on CPU, GPU or TPU.

In [8]:
def experiment_fn(run_config, params):
    feature_cols = [tf.contrib.layers.real_valued_column("",
        dimension=NUM_FEATURES)]
    estimator = tf.contrib.learn.DNNClassifier(
        feature_columns=feature_cols,
        hidden_units=[512, 256],
        n_classes=NUM_CLASSES,
        config=run_config)
    return tf.contrib.learn.Experiment(
        estimator=estimator,
        train_input_fn=train_input_fn,
        train_steps=NUM_STEPS,
        eval_input_fn=test_input_fn)

## Run Experiment

In [9]:
shutil.rmtree(MODEL_DIR, ignore_errors=True)
tf.contrib.learn.learn_runner.run(experiment_fn, 
    run_config=tf.contrib.learn.RunConfig(
        model_dir=MODEL_DIR))

INFO:tensorflow:Using config: {'_model_dir': '../../data/expt-tf-model', '_save_checkpoints_secs': 600, '_num_ps_replicas': 0, '_keep_checkpoint_max': 5, '_tf_random_seed': None, '_task_type': None, '_environment': 'local', '_is_chief': True, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x117f7ed50>, '_tf_config': gpu_options {
  per_process_gpu_memory_fraction: 1
}
, '_num_worker_replicas': 0, '_task_id': 0, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_evaluation_master': '', '_keep_checkpoint_every_n_hours': 10000, '_master': '', '_session_config': None}
Instructions for updating:
Monitors are deprecated. Please use tf.train.SessionRunHook.
Instructions for updating:
Please switch to tf.summary.scalar. Note that tf.summary.scalar uses the node name instead of the tag. This means that TensorFlow will automatically de-duplicate summary names based on the scope they are created in. Also, passing a tensor or list of tags to a scalar summ

({'accuracy': 0.9152, 'global_step': 100, 'loss': 0.30302814}, [])