# DNN estimator

In [1]:
# filter out future warnings from numpy on tensorflow calls
import os
import warnings  
import numpy as np

with warnings.catch_warnings():  
    warnings.filterwarnings("ignore",category=FutureWarning)
    import tensorflow as tf
    from tensorflow.compat.v1.estimator import DNNClassifier

from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

from datetime import datetime




## Dataset loading, splitting, standardization

In [2]:
dataset = fetch_openml('mnist_784', version=1)
n_classes = len(set(dataset.target))

In [3]:
train_data, test_data, train_target, test_target = train_test_split(dataset.data, dataset.target)

In [4]:
# standardization trained on train, reused on test dataset
scaler = StandardScaler()
X_train = scaler.fit_transform(train_data)
X_test = scaler.transform(test_data)

In [5]:
# string to class index
class_index_map = { k: v for v, k in enumerate(sorted(list(set(dataset.target))))}
index_class_map = { v: k for k,v in class_index_map.items() }

class_index_mapping = np.vectorize(lambda x: class_index_map[x])

In [6]:
y_train = class_index_mapping(train_target).reshape(-1,1)
y_test = class_index_mapping(test_target).reshape(-1,1)

## MLP Training: canned estimator

In [7]:
batch_size=50
max_steps=40000
save_checkpoints_steps=2000
log_step_count_steps=500

In [8]:
feature_columns = [ tf.feature_column.numeric_column(key='f1', shape=X_train[0].shape) ]

# this seed is used only for initialization
# batch is still random with no chance to set the seed
# see: https://stackoverflow.com/questions/47009560/tf-estimator-shuffle-random-seed
config = tf.estimator.RunConfig(tf_random_seed=42,
                                model_dir=os.path.join('tmp',
                                                       datetime.utcnow().strftime('%Y%m%d-%H%M%S')),
                                save_checkpoints_steps=save_checkpoints_steps,
                                log_step_count_steps=log_step_count_steps)

estimator = DNNClassifier(feature_columns=feature_columns,
                          hidden_units = [300,100],
                          n_classes = n_classes,
                          config=config
                         )

train_spec = tf.estimator.TrainSpec(input_fn=tf.estimator.inputs.numpy_input_fn(x = {'f1' : X_train},
                                                                                y = y_train, 
                                                                                batch_size=batch_size, 
                                                                                num_epochs=None,
                                                                                shuffle=True),
                                    max_steps=max_steps)
                                    
eval_spec = tf.estimator.EvalSpec(input_fn=tf.estimator.inputs.numpy_input_fn(x = {'f1' : X_test},
                                                                              y = y_test, 
                                                                              num_epochs=None,
                                                                              shuffle=False))

tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)

INFO:tensorflow:Using config: {'_device_fn': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f9e3e8afe10>, '_save_checkpoints_secs': None, '_num_ps_replicas': 0, '_experimental_max_worker_delay_secs': None, '_experimental_distribute': None, '_num_worker_replicas': 1, '_task_id': 0, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_tf_random_seed': 42, '_train_distribute': None, '_master': '', '_model_dir': 'tmp/20190906-102353', '_log_step_count_steps': 500, '_protocol': None, '_eval_distribute': None, '_task_type': 'worker', '_save_checkpoints_steps': 2000, '_service': None, '_evaluation_master': '', '_keep_checkpoint_every_n_hours': 10000, '_is_chief': True, '_save_summary_steps': 100, '_keep_checkpoint_max': 5, '_global_id_in_cluster': 0}
INFO:tensorflow:Not using Distribute Coordinator.
INFO:tensorflow:Running training and evaluation locally (non-distributed).
INFO:te

INFO:tensorflow:Saving checkpoints for 10000 into tmp/20190906-102353/model.ckpt.
Instructions for updating:
Use standard file APIs to delete files with this prefix.
INFO:tensorflow:Skip the current checkpoint eval due to throttle secs (600 secs).
INFO:tensorflow:global_step/sec: 290.523
INFO:tensorflow:loss = 0.3180346, step = 10001 (1.722 sec)
INFO:tensorflow:global_step/sec: 322.292
INFO:tensorflow:loss = 0.07306342, step = 10501 (1.551 sec)
INFO:tensorflow:global_step/sec: 329.55
INFO:tensorflow:loss = 0.124273896, step = 11001 (1.517 sec)
INFO:tensorflow:global_step/sec: 333.836
INFO:tensorflow:loss = 0.030183353, step = 11501 (1.498 sec)
INFO:tensorflow:Saving checkpoints for 12000 into tmp/20190906-102353/model.ckpt.
INFO:tensorflow:Skip the current checkpoint eval due to throttle secs (600 secs).
INFO:tensorflow:global_step/sec: 315.847
INFO:tensorflow:loss = 0.10849595, step = 12001 (1.583 sec)
INFO:tensorflow:global_step/sec: 333.244
INFO:tensorflow:loss = 0.060689114, step =

INFO:tensorflow:global_step/sec: 317.592
INFO:tensorflow:loss = 0.0018191895, step = 37501 (1.575 sec)
INFO:tensorflow:Saving checkpoints for 38000 into tmp/20190906-102353/model.ckpt.
INFO:tensorflow:Skip the current checkpoint eval due to throttle secs (600 secs).
INFO:tensorflow:global_step/sec: 306.652
INFO:tensorflow:loss = 0.027644545, step = 38001 (1.630 sec)
INFO:tensorflow:global_step/sec: 316.175
INFO:tensorflow:loss = 0.004091997, step = 38501 (1.582 sec)
INFO:tensorflow:global_step/sec: 333.487
INFO:tensorflow:loss = 0.011268737, step = 39001 (1.499 sec)
INFO:tensorflow:global_step/sec: 317.282
INFO:tensorflow:loss = 0.0062669525, step = 39501 (1.576 sec)
INFO:tensorflow:Saving checkpoints for 40000 into tmp/20190906-102353/model.ckpt.
INFO:tensorflow:Skip the current checkpoint eval due to throttle secs (600 secs).
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2019-09-06T12:26:03Z
INFO:tensorflow:Graph was f

({'accuracy': 0.9758594,
  'average_loss': 0.15713948,
  'global_step': 40000,
  'loss': 20.113853},
 [])