In [1]:
import tensorflow as tf
import shutil
print(tf.__version__)

1.15.0


In [14]:
FEATURES_COL = 'x'
FEATURES_LEN = 28 * 28
DEFAULTS = [[0]] + [[0.0] for i in range(FEATURES_LEN)]
LIST_OF_LABELS = "0,1,2,3,4,5,6,7,8,9".split(',')
NCLASSES = len(LIST_OF_LABELS)

In [4]:
def linear_model(features, mode, params):
    X = features[FEATURES_COL]
    ylogits = tf.layers.dense(X, units = NCLASSES, activation = None)
    return ylogits, NCLASSES

In [109]:
def dnn_model(features, mode, hparams):
    X = features[FEATURES_COL]
    h1 = tf.layers.dense(inputs = X, units = 300, activation = tf.nn.relu)
    h2 = tf.layers.dense(inputs = h1, units = 100, activation = tf.nn.relu)
    h3 = tf.layers.dense(inputs = h2, units = 30, activation = tf.nn.relu)
    ylogits = tf.layers.dense(inputs = h3, units = NCLASSES, activation = None)
    return ylogits, NCLASSES

In [110]:
def dnn_dropout_model(features, mode, hparams):
    dprob = hparams.get("dprob", 0.1)

    X = features[FEATURES_COL]
    h1 = tf.layers.dense(inputs = X, units = 300, activation = tf.nn.relu)
    h2 = tf.layers.dense(inputs = h1, units = 100, activation = tf.nn.relu)
    h3 = tf.layers.dense(inputs = h2, units = 30, activation = tf.nn.relu)
    h3d = tf.layers.dropout(inputs = h3, rate = dprob, training = (mode == tf.estimator.ModeKeys.TRAIN)) #only dropout when training
    ylogits = tf.layers.dense(inputs = h3d, units = NCLASSES, activation = None)
    return ylogits, NCLASSES

In [111]:
def read_dataset(filename, mode, batch_size=512):
    def _input_fn():
        def decode_csv(row):
            # row is a string tensor containing the contents of one row
            features = tf.decode_csv(row, record_defaults=DEFAULTS)  # string tensor -> list of 50 rank 0 float tensors
            label = features.pop(0)  # remove last feature and use as label
            features = tf.stack(features)  # list of rank 0 tensors -> single rank 1 tensor
            features = features / 255
            return {FEATURES_COL: features}, label

        # Create list of file names that match "glob" pattern (i.e. data_file_*.csv)
        dataset = tf.data.Dataset.list_files(filename)
        # Read in data from files
        dataset = dataset.flat_map(
            lambda filename: tf.data.TextLineDataset(filename).skip(1)
        )
        # Parse text lines as comma-separated values (CSV)
        dataset = dataset.map(decode_csv)

        if mode == tf.estimator.ModeKeys.TRAIN:
            num_epochs = None  # loop indefinitely
            dataset = dataset.shuffle(buffer_size=10 * batch_size)
        else:
            num_epochs = 1  # end-of-input after this

        dataset = dataset.repeat(num_epochs).batch(batch_size)
        return dataset.make_one_shot_iterator().get_next()

    return _input_fn

In [112]:
def serving_input_fn():
    feature_placeholders = {
        FEATURES_COL: tf.placeholder(tf.float32, [None, FEATURES_LEN])
    }

    features = {
        key: tf.expand_dims(tensor, -1)
        for key, tensor in feature_placeholders.items()
    }
    features[FEATURES_COL] = tf.squeeze(features[FEATURES_COL], axis=[2])

    return tf.estimator.export.ServingInputReceiver(features, feature_placeholders)

In [113]:
def classifier(features, labels, mode, params):
    model_functions = {
        "linear": linear_model,
        "dnn": dnn_model,
        "dnn_dropout": dnn_dropout_model
        }
    model_function = model_functions[params["model"]] 
    ylogits, nclasses = model_function(features, mode, params)

    probabilities = tf.nn.softmax(logits = ylogits)
    class_int = tf.cast(x = tf.argmax(input = ylogits, axis = 1), dtype = tf.uint8)
    class_str = tf.gather(params = LIST_OF_LABELS, indices = tf.cast(x = class_int, dtype = tf.int32))
  
    if mode == tf.estimator.ModeKeys.TRAIN or mode == tf.estimator.ModeKeys.EVAL:
        # Convert string label to int
        #labels_table = tf.contrib.lookup.index_table_from_tensor(mapping = tf.constant(value = LIST_OF_LABELS, dtype = tf.string))
        #labels = labels_table.lookup(keys = labels)

        loss = tf.reduce_mean(input_tensor = tf.nn.softmax_cross_entropy_with_logits_v2(logits = ylogits, labels = tf.one_hot(indices = labels, depth = NCLASSES)))
        
        if mode == tf.estimator.ModeKeys.TRAIN:
            # This is needed for batch normalization, but has no effect otherwise
            update_ops = tf.get_collection(key = tf.GraphKeys.UPDATE_OPS)
            with tf.control_dependencies(control_inputs = update_ops):
                train_op = tf.contrib.layers.optimize_loss(
                    loss = loss, 
                    global_step = tf.train.get_global_step(),
                    learning_rate = params["learning_rate"],
                    optimizer = "Adam")
            eval_metric_ops = None
        else:
            train_op = None
            eval_metric_ops =  {"accuracy": tf.metrics.accuracy(labels = labels, predictions = class_int)}
    else:
        loss = None
        train_op = None
        eval_metric_ops = None
 
    return tf.estimator.EstimatorSpec(
        mode = mode,
        predictions = {"probabilities": probabilities, 
                       "classid": class_int, 
                       "class": class_str},
        loss = loss,
        train_op = train_op,
        eval_metric_ops = eval_metric_ops,
        export_outputs = {"classes": tf.estimator.export.PredictOutput(
            {"probabilities": probabilities, 
             "classid": class_int, 
             "class": class_str})}
    )

In [114]:
def train_and_evaluate(output_dir, hparams):
    tf.summary.FileWriterCache.clear() # ensure filewriter cache is clear for TensorBoard events file
    
    EVAL_INTERVAL = 300 # every 5 minutes
    
    # Instantiate base estimator class for custom model function
    estimator = tf.estimator.Estimator(
        model_fn = classifier,
        params = hparams,
        config = tf.estimator.RunConfig(
            save_checkpoints_secs = EVAL_INTERVAL),
            model_dir = output_dir)
    
    # Set estimator's train_spec to use train_input_fn and train for so many steps
    train_spec = tf.estimator.TrainSpec(
        input_fn = read_dataset(
            hparams['train_data_path'],
            tf.estimator.ModeKeys.TRAIN,
            hparams['batch_size']),
        max_steps = hparams["train_steps"])

    # Create exporter that uses serving_input_fn to create saved_model for serving
    exporter = tf.estimator.LatestExporter(
        name = "exporter", 
        serving_input_receiver_fn = serving_input_fn)

    # Set estimator's eval_spec to use eval_input_fn and export saved_model
    eval_spec = tf.estimator.EvalSpec(
        input_fn = read_dataset(
            hparams['eval_data_path'],
            tf.estimator.ModeKeys.EVAL,
            hparams['batch_size']),
        steps = None,
        exporters = exporter,
        start_delay_secs = EVAL_INTERVAL,
        throttle_secs = EVAL_INTERVAL)

    # Run train_and_evaluate loop
    tf.estimator.train_and_evaluate(
        estimator = estimator, 
        train_spec = train_spec, 
        eval_spec = eval_spec)

In [115]:
OUTDIR = 'mnist_trained'

In [122]:
hparams = {
    'output_dir':OUTDIR,
    'train_steps':300,
    'learning_rate':0.01,
    'batch_size':128,
    'model':'dnn_dropout',
    'train_data_path':'data/mnist_train.csv',
    'eval_data_path':'data/mnist_test.csv'
}

In [123]:
shutil.rmtree(OUTDIR, ignore_errors = True)

In [124]:
train_and_evaluate(OUTDIR, hparams)

INFO:tensorflow:Using config: {'_keep_checkpoint_max': 5, '_log_step_count_steps': 100, '_experimental_max_worker_delay_secs': None, '_experimental_distribute': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7fcaac890f28>, '_evaluation_master': '', '_device_fn': None, '_master': '', '_is_chief': True, '_protocol': None, '_train_distribute': None, '_task_type': 'worker', '_save_checkpoints_steps': None, '_save_checkpoints_secs': 300, '_num_ps_replicas': 0, '_task_id': 0, '_global_id_in_cluster': 0, '_tf_random_seed': None, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_save_summary_steps': 100, '_keep_checkpoint_every_n_hours': 10000, '_service': None, '_num_worker_replicas': 1, '_session_creation_timeout_secs': 7200, '_eval_distribute': None, '_model_dir': 'mnist_trained'}
INFO:tensorflow:Not using Distribute Coordinator.
INFO:tensorflow:Running training and evaluation

In [85]:
!ls mnist_trained/export/exporter

1576031131


In [86]:
!gcloud ml-engine local predict \
--model-dir=mnist_trained/export/exporter/1576031131 --json-instances=data/test.json

If the signature defined in the model is not serving_default then you must specify it via --signature-name flag, otherwise the command may fail.






2019-12-11 02:26:14.293714: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcuda.so.1
2019-12-11 02:26:14.340325: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:983] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2019-12-11 02:26:14.341494: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1618] Found device 0 with properties: 
name: Tesla K80 major: 3 minor: 7 memoryClockRate(GHz): 0.8235
pciBusID: 0000:00:04.0
2019-12-11 02:26:14.341561: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:983] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2019-12-11 02:26:14.342692: I tensorflow/core/common_runtime/gpu