In [4]:
def wrapper(learning_rate, dropout):

	import tensorflow as tf
	import numpy as np
	from hops import tensorboard
	from hops import hdfs

	# Training Parameters
	#learning_rate = 0.001
	num_steps = 200
	batch_size = 128

	# Network Parameters
	num_input = 784 # MNIST data input (img shape: 28*28)
	num_classes = 10 # MNIST total classes (0-9 digits)

	train_filenames = [hdfs.project_path() + "mnist/train.tfrecords"]
	validation_filenames = [hdfs.project_path() + "mnist/validation.tfrecords"]

	# Create the neural network
	def conv_net(x_dict, n_classes, dropout, reuse, is_training):

	    # Define a scope for reusing the variables
	    with tf.variable_scope('ConvNet', reuse=reuse):
		# TF Estimator input is a dict, in case of multiple inputs
		x = x_dict

		# MNIST data input is a 1-D vector of 784 features (28*28 pixels)
		# Reshape to match picture format [Height x Width x Channel]
		# Tensor input become 4-D: [Batch Size, Height, Width, Channel]
		x = tf.reshape(x, shape=[-1, 28, 28, 1])

		# Convolution Layer with 32 filters and a kernel size of 5
		conv1 = tf.layers.conv2d(x, 32, 5, activation=tf.nn.relu)
		# Max Pooling (down-sampling) with strides of 2 and kernel size of 2
		conv1 = tf.layers.max_pooling2d(conv1, 2, 2)

		# Convolution Layer with 32 filters and a kernel size of 5
		conv2 = tf.layers.conv2d(conv1, 64, 3, activation=tf.nn.relu)
		# Max Pooling (down-sampling) with strides of 2 and kernel size of 2
		conv2 = tf.layers.max_pooling2d(conv2, 2, 2)

		# Flatten the data to a 1-D vector for the fully connected layer
		fc1 = tf.contrib.layers.flatten(conv2)

		# Fully connected layer (in tf contrib folder for now)
		fc1 = tf.layers.dense(fc1, 1024)
		# Apply Dropout (if is_training is False, dropout is not applied)
		fc1 = tf.layers.dropout(fc1, rate=dropout, training=is_training)

		# Output layer, class prediction
		out = tf.layers.dense(fc1, n_classes)

	    return out


	# Define the model function (following TF Estimator Template)
	def model_fn(features, labels, mode, params):

	    # Build the neural network
	    # Because Dropout have different behavior at training and prediction time, we
	    # need to create 2 distinct computation graphs that still share the same weights.
	    logits_train = conv_net(features, num_classes, dropout, reuse=False, is_training=True)
	    print logits_train
	    logits_test = conv_net(features, num_classes, dropout, reuse=True, is_training=False)



	    # Predictions
	    pred_classes = tf.argmax(logits_test, axis=1)
	    pred_probas = tf.nn.softmax(logits_test)

	    # If prediction mode, early return
	    if mode == tf.estimator.ModeKeys.PREDICT:
		return tf.estimator.EstimatorSpec(mode, predictions=pred_classes)

	    # Define loss and optimizer
	    loss_op = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(
		logits=logits_train, labels=tf.cast(labels, dtype=tf.int32)))
	    lr = tf.train.exponential_decay(learning_rate, tf.train.get_global_step(),100000,0.96)
	    optimizer = tf.train.AdamOptimizer(learning_rate=lr)
	    train_op = optimizer.minimize(loss_op, global_step=tf.train.get_global_step())

	    # Evaluate the accuracy of the model
	    acc_op = tf.metrics.accuracy(labels=labels, predictions=pred_classes)

	    image = tf.reshape(features[:10], [-1, 28, 28, 1])
	    tf.summary.image("image", image)
	    # tf.summary.scalar('my_accuracy', acc_op[0])

	    # TF Estimators requires to return a EstimatorSpec, that specify
	    # the different ops for training, evaluating, ...
	    estim_specs = tf.estimator.EstimatorSpec(
	      mode=mode,
	      predictions=pred_classes,
	      loss=loss_op,
	      train_op=train_op,
	      eval_metric_ops={'accuracy': acc_op})

	    return estim_specs


	def data_input_fn(filenames, batch_size=128, shuffle=False, repeat=None):

	    def parser(serialized_example):
		"""Parses a single tf.Example into image and label tensors."""
		features = tf.parse_single_example(
		    serialized_example,
		    features={
		        'image_raw': tf.FixedLenFeature([], tf.string),
		        'label': tf.FixedLenFeature([], tf.int64),
		    })
		image = tf.decode_raw(features['image_raw'], tf.uint8)
		image.set_shape([28 * 28])

		# Normalize the values of the image from the range [0, 255] to [-0.5, 0.5]
		image = tf.cast(image, tf.float32) / 255 - 0.5
		label = tf.cast(features['label'], tf.int32)
		return image, label

	    def _input_fn():
		# Import MNIST data
		dataset = tf.contrib.data.TFRecordDataset(filenames)

		# Map the parser over dataset, and batch results by up to batch_size
		dataset = dataset.map(parser, num_threads=1, output_buffer_size=batch_size)
		if shuffle:
		    dataset = dataset.shuffle(buffer_size=128)
		dataset = dataset.batch(batch_size)
		dataset = dataset.repeat(repeat)
		iterator = dataset.make_one_shot_iterator()

		features, labels = iterator.get_next()

		return features, labels

	    return _input_fn


	run_config = tf.contrib.learn.RunConfig(
	    model_dir=tensorboard.logdir(),
	    save_checkpoints_steps=10,
	    save_summary_steps=5,
	    log_step_count_steps=10)

	hparams = tf.contrib.training.HParams(
        learning_rate=learning_rate, dropout_rate=dropout)

	summary_hook = tf.train.SummarySaverHook(
	      save_steps = run_config.save_summary_steps,
	      scaffold= tf.train.Scaffold(),
	      summary_op=tf.summary.merge_all())

	mnist_estimator = tf.estimator.Estimator(
	    model_fn=model_fn,
	    config=run_config,
	    params=hparams
	)


	train_input_fn = data_input_fn(train_filenames[0], batch_size=batch_size)
	eval_input_fn = data_input_fn(validation_filenames[0], batch_size=batch_size)

	experiment = tf.contrib.learn.Experiment(
	    mnist_estimator,
	    train_input_fn=train_input_fn,
	    eval_input_fn=eval_input_fn,
	    train_steps=num_steps,
	    min_eval_frequency=5,
	    eval_hooks=[summary_hook]
	)

	experiment.train_and_evaluate()


In [5]:
from hops import util

#Define dict for hyperparameters
args_dict = {'learning_rate': [0.0005], 'dropout': [0.7]}

# Generate a grid for the given hyperparameters
args_dict_grid = util.grid_params(args_dict)

print(args_dict_grid)

{'learning_rate': [0.0005], 'dropout': [0.7]}

In [None]:
from hops import tflauncher
import timeit
 
start_time = timeit.default_timer()
tensorboard_hdfs_logdir = tflauncher.launch(spark, wrapper, args_dict_grid)
elapsed = timeit.default_timer() - start_time
print "Elapsed time: " + str(elapsed)

Finished TensorFlow job 

Make sure to check /Logs/TensorFlow/application_1511276242554_0465/runId.0 for logfile and TensorBoard logdir
Elapsed time: 132.429859877

In [None]:
from hops import tensorboard

# Visualize all TensorBoard events for the jobs in the same TensorBoard
tensorboard.visualize(spark, tensorboard_hdfs_logdir)