In [28]:
import argparse
import sys
import os
import time
import copy

import tensorflow as tf
import pandas as pd

import dataset
#from models.conv2_dense2_dropout import Model
from models.dense3 import Model

from helpers.os_utils import os_info
from helpers.history import ExpHistory
from helpers.estimator_utils import create_model_fn

### Get the history and the runtime context 

In [51]:
tf.logging.set_verbosity(tf.logging.INFO)

HIST_FILE_NAME = 'experiment_history.csv'
history = ExpHistory(HIST_FILE_NAME)

localtime = time.asctime(time.localtime(time.time()))
user = os.environ.get('USER', os.environ.get('USERNAME', 'anonymous'))
print("\n\n")
print("Welcome, %s, it's %s, and you'll be working with Tensorflow version %s" % (user, localtime, tf.__version__))
rt=os_info()
this_os = rt['os']
this_node = rt['node']
this_machine = rt['machine']
this_cuda = rt['cuda']
print("Your current runtime: \n  node: %s, \n  os: %s, \n  machine: %s, \n  cuda: %s" % (this_node, this_os, this_machine, this_cuda))
print("\n")
columns=[
    'node', 
    #'os',
    #'machine',
    'cuda',
    'multi_gpu',
    'batch_size',
    'data_dir',
    #'model_dir',
    'train_epochs',
    #'user',
    #'time_stamp',
    'localtime',
    'steps',
    'accuracy',
    'duration'
]
history.experiments.tail(10)[columns]




Welcome, wgiersche, it's Tue May  1 15:31:42 2018, and you'll be working with Tensorflow version 1.8.0
Your current runtime: 
  node: wolfgangs-mac-pro.home, 
  os: Darwin-15.6.0-x86_64-i386-64bit, 
  machine: x86_64, 
  cuda: False




Unnamed: 0,node,cuda,multi_gpu,batch_size,data_dir,train_epochs,localtime,steps,accuracy,duration
15,scylla,1,0,256,/var/ellie/data/mnist_fashion,4,Tue May 1 12:53:26 2018,1880.0,0.8841,4.0
16,wolfgangs-mac-pro.home,0,0,256,/var/ellie/data/mnist_fashion,4,Tue May 1 13:01:05 2018,940.0,0.8726,12.0
17,wolfgangs-mac-pro.home,0,0,256,/var/ellie/data/mnist_fashion,12,Tue May 1 13:45:49 2018,2820.0,0.8947,31.0
18,wolfgangs-mac-pro.home,0,0,256,/var/ellie/data/mnist,12,Tue May 1 13:45:49 2018,2820.0,0.9797,30.0
19,wolfgangs-mac-pro.home,0,0,512,/var/ellie/data/mnist_fashion,12,Tue May 1 14:01:26 2018,1416.0,0.8848,26.0
20,wolfgangs-mac-pro.home,0,0,512,/var/ellie/data/mnist_fashion,12,Tue May 1 14:11:10 2018,1416.0,0.8879,26.0
21,wolfgangs-mac-pro.home,0,0,64,/var/ellie/data/mnist_fashion,2,Tue May 1 14:47:08 2018,1876.0,0.9038,146.0
22,wolfgangs-mac-pro.home,0,0,64,/var/ellie/data/mnist_fashion,2,Tue May 1 14:47:08 2018,3752.0,0.9088,143.0
23,wolfgangs-mac-pro.home,0,0,64,/var/ellie/data/mnist_fashion,2,Tue May 1 15:27:27 2018,1876.0,0.873,12.0
24,wolfgangs-mac-pro.home,0,0,64,/var/ellie/data/mnist_fashion,2,Tue May 1 15:28:29 2018,1876.0,0.8536,12.0


### Want to start with the most recent record from this platform?

In [41]:
hparams=history.suggest_from_history()
hparams=history.copy_from_record(18)
hparams

node                     wolfgangs-mac-pro.home
os              Darwin-15.6.0-x86_64-i386-64bit
machine                                  x86_64
cuda                                          0
multi_gpu                                     0
batch_size                                  256
data_dir                  /var/ellie/data/mnist
model_dir                      /tmp/mnist_model
train_epochs                                 12
user                                  wgiersche
timestamp                            1525181309
localtime              Tue May  1 15:28:29 2018
accuracy                                 0.9797
steps                                      2820
duration                                     30
Name: 18, dtype: object

### Use as new hyper-parameter record, with adaptations 

In [42]:
DATA_SET = 'FASHION'
hparams.data_dir = '/var/ellie/data/mnist_fashion'
hparams.train_epochs = 2
hparams.batch_size = 64
hparams.multi_gpu = False
hparams

node                     wolfgangs-mac-pro.home
os              Darwin-15.6.0-x86_64-i386-64bit
machine                                  x86_64
cuda                                          0
multi_gpu                                 False
batch_size                                   64
data_dir          /var/ellie/data/mnist_fashion
model_dir                      /tmp/mnist_model
train_epochs                                  2
user                                  wgiersche
timestamp                            1525181309
localtime              Tue May  1 15:28:29 2018
accuracy                                 0.9797
steps                                      2820
duration                                     30
Name: 18, dtype: object

# Get to work!

In [43]:
# For the sake of this tutorial, we always start from scratch
!rm -rf /tmp/mnist_model

### The model function constructs the computational graphs for training, eval and test


In [44]:
model_function = create_model_fn(
    lambda params: Model(params),
    tf.train.AdamOptimizer(),
    tf.losses.sparse_softmax_cross_entropy,
    hparams)

Performance depends on the data format, and differs between CPU and GPU computations

In [45]:
data_format = ('channels_first' if tf.test.is_built_with_cuda() else 'channels_last')

### The Estimator is the center piece of Tensorflow's new API

In [46]:
mnist_classifier = tf.estimator.Estimator(
    model_fn=model_function,
    model_dir=hparams.model_dir,
    params={
        'data_format': data_format,
        'multi_gpu': hparams.multi_gpu
    })

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': '/tmp/mnist_model', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x11da327f0>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


##### ```input_fn``` functions are a factories for ```DataSet```s

### Input datasets for training and evaluation

In [47]:
def train_input_fn():
    ds = dataset.training_dataset(hparams.data_dir, DATA_SET)
    ds = ds.cache().shuffle(buffer_size=50000).\
        batch(hparams.batch_size).\
        repeat(hparams.train_epochs)
    return ds

In [48]:
def eval_input_fn():
    return dataset.test_dataset(hparams.data_dir, DATA_SET).\
        batch(hparams.batch_size).\
        make_one_shot_iterator().get_next()

In [49]:
tensors_to_log = {'train_accuracy': 'train_accuracy'}
logging_hook = tf.train.LoggingTensorHook(tensors=tensors_to_log, every_n_iter=1000)

### Run the training and report the new hyper-parameters 

In [50]:
# Train
start_time=time.time()
mnist_classifier.train(input_fn=train_input_fn, hooks=[logging_hook])
duration=time.time() - start_time

# Evaluate
eval_results = mnist_classifier.evaluate(input_fn=eval_input_fn)
hparams.accuracy = eval_results['accuracy']
hparams.steps = eval_results['global_step']
hparams.duration = int(duration)

# Report!
history.report_experiment(hparams)

print('Evaluation results:\n\t%s' % eval_results)
hparams

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 1 into /tmp/mnist_model/model.ckpt.
INFO:tensorflow:train_accuracy = 0.125
INFO:tensorflow:loss = 2.275834, step = 1
INFO:tensorflow:global_step/sec: 110.231
INFO:tensorflow:loss = 0.7123023, step = 101 (0.907 sec)
INFO:tensorflow:global_step/sec: 161.131
INFO:tensorflow:loss = 0.46541423, step = 201 (0.621 sec)
INFO:tensorflow:global_step/sec: 262.8
INFO:tensorflow:loss = 0.4890712, step = 301 (0.381 sec)
INFO:tensorflow:global_step/sec: 263.337
INFO:tensorflow:loss = 0.65269184, step = 401 (0.380 sec)
INFO:tensorflow:global_step/sec: 264.086
INFO:tensorflow:loss = 0.26734117, step = 501 (0.379 sec)
INFO:tensorflow:global_step/sec: 263.08
INFO:tensorflow:loss = 0.3595935, step = 601 (0.380 sec)
INFO:tensorflow:glo

node                     wolfgangs-mac-pro.home
os              Darwin-15.6.0-x86_64-i386-64bit
machine                                  x86_64
cuda                                          0
multi_gpu                                 False
batch_size                                   64
data_dir          /var/ellie/data/mnist_fashion
model_dir                      /tmp/mnist_model
train_epochs                                  2
user                                  wgiersche
timestamp                            1525181309
localtime              Tue May  1 15:28:29 2018
accuracy                                 0.8536
steps                                      1876
duration                                     12
Name: 18, dtype: object