In [None]:
import argparse
import sys
import os
import time
import copy

import tensorflow as tf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib

import dataset
from models.conv2_dense2_dropout import Model
#from models.dense3 import Model

from helpers.os_utils import os_info
from helpers.history import ExpHistory
from helpers.estimator_utils import create_model_fn, split_datasource

In [None]:
# You may want to rerun and should close the session, if one is open.
try: 
    sess.close()
except NameError:
    print("Don't worry. Need to ignore this error once")
sess = tf.InteractiveSession()

### Get the history and the runtime context 

In [None]:
tf.logging.set_verbosity(tf.logging.INFO)

HIST_FILE_NAME = 'experiment_history.csv'
history = ExpHistory(HIST_FILE_NAME)

localtime = time.asctime(time.localtime(time.time()))
user = os.environ.get('USER', os.environ.get('USERNAME', 'anonymous'))
print("\n\n")
print("Welcome, %s, it's %s, and you'll be working with Tensorflow version %s" % (user, localtime, tf.__version__))
rt=os_info()
this_os = rt['os']
this_node = rt['node']
this_machine = rt['machine']
this_cuda = rt['cuda']
print("Your current runtime: \n  node: %s, \n  os: %s, \n  machine: %s, \n  cuda: %s" % (this_node, this_os, this_machine, this_cuda))
print("\n")
columns=[
    'node', 
    #'os',
    #'machine',
    'cuda',
    'multi_gpu',
    'model',
    'batch_size',
    'data_dir',
    #'model_dir',
    'train_epochs',
    #'user',
    #'time_stamp',
    'localtime',
    'steps',
    'accuracy',
    'duration'
]
history.experiments.tail(10)[columns]

### Want to start with the most recent record from this platform?

In [None]:
hparams=history.suggest_from_history()
#hparams=history.copy_from_record(18)
hparams

### Use as new hyper-parameter record, with adaptations 

In [None]:
#DATA_SET = 'DIGITS'
#hparams.data_dir = '/var/ellie/data/mnist'

DATA_SET = 'FASHION'
hparams.data_dir = '/var/ellie/data/mnist_fashion'

hparams.train_epochs = 2
hparams.batch_size = 256
hparams.multi_gpu = False
hparams.model = Model.id
hparams

### Always have a quick peek at your input data!

In [None]:
samples = dataset.training_dataset(hparams.data_dir, DATA_SET).batch(10).make_one_shot_iterator().get_next()
samples = sess.run(samples)
f, arr = plt.subplots(2,5)
for row in (0, 1):
    for col in range(5):
        i = 5 * row + col
        img = samples[0][i].reshape([28,28])
        arr[row, col].imshow(img)
samples[1][:10]

# Get to work!

In [None]:
# For the sake of this tutorial, we always start from scratch
!rm -rf /tmp/mnist_model

### The model function constructs the computational graphs for training, eval and test
Note that the actual construction takes place within the Estimator. Thus, none of the the constructing code should be explicitly called from the API client. The Estimator will complain that parts that have been constructed prior to those that itself constructs, don't belong to the same graph. 

In [None]:
model_function = create_model_fn(
    lambda params: Model(params),
    tf.train.AdamOptimizer(),
    tf.losses.sparse_softmax_cross_entropy,
    hparams)

Performance depends on the data format, and differs between CPU and GPU computations

In [None]:
data_format = ('channels_first' if tf.test.is_built_with_cuda() else 'channels_last')

### The Estimator is the center piece of Tensorflow's new API

In [None]:
mnist_classifier = tf.estimator.Estimator(
    model_fn=model_function,
    model_dir=hparams.model_dir,
    params={
        'data_format': data_format,
        'multi_gpu': hparams.multi_gpu
    })

##### ```input_fn``` functions are a factories for ```DataSet```s

### Split the training dataset into training and evaluation sets

In [None]:
def train_input_fn():
    ds_tr = dataset.training_dataset(hparams.data_dir, DATA_SET)
    ds_tr_tr, _ = split_datasource(ds_tr, 60000, 0.95)
    ds1 = ds_tr_tr.cache().shuffle(buffer_size=57000).\
        repeat(hparams.train_epochs).\
        batch(hparams.batch_size)
    return ds1

def eval_input_fn():
    ds_tr = dataset.training_dataset(hparams.data_dir, DATA_SET)
    _, ds_tr_ev = split_datasource(ds_tr, 60000, 0.95)
    ds2 = ds_tr_ev.batch(hparams.batch_size)
    return ds2

### Logging hooks

In [None]:
tensors_to_log = {'train_accuracy': 'train_accuracy'}
logging_hook = tf.train.LoggingTensorHook(tensors=tensors_to_log, every_n_iter=1000)

### Run the training and report the new hyper-parameters 

In [None]:
# Train
start_time=time.time()
mnist_classifier.train(input_fn=train_input_fn, hooks=[logging_hook])
duration=time.time() - start_time

# Evaluate
eval_results = mnist_classifier.evaluate(input_fn=eval_input_fn)
hparams.accuracy = eval_results['accuracy']
hparams.steps = eval_results['global_step']
hparams.duration = int(duration)

# Report!
history.report_experiment(hparams)

print('Evaluation results:\n\t%s' % eval_results)
hparams