## Start the spark configureation

## Cluster Configuration

In [1]:
sc

Starting Spark application


ID,YARN Application ID,Kind,State,Spark UI,Driver log,Current session?
2,application_1500400165929_0025,pyspark3,idle,Link,Link,✔


SparkSession available as 'spark'.
<pyspark.context.SparkContext object at 0x7fa301e98940>

In [3]:
%%configure -f 
{"name": "spark-tensorflow",
"driverMemory": "1G",
"executorMemory": "4G",
"executorCores": 4
}

Starting Spark application


ID,YARN Application ID,Kind,State,Spark UI,Driver log,Current session?
3,application_1500400165929_0026,pyspark3,idle,Link,Link,✔


SparkSession available as 'spark'.


ID,YARN Application ID,Kind,State,Spark UI,Driver log,Current session?
3,application_1500400165929_0026,pyspark3,idle,Link,Link,✔


## Specify the parameters

In [7]:
import itertools

# A simple example
epochs = [20]
learning_rate = [0.1, 0.0001]
# Build a list of list of hyperparameter values
sample_hyperparams = [learning_rate, epochs]
sample_combinations = [item for item in itertools.product(*sample_hyperparams)]
print('Simple combination for a standard CNN skeleton for Image Classification ...')
print(sample_combinations)


# some parameters for playing with neural style painting
iterations = [500]
learning_rate = [1e1, 0.1]
style_wt = [5e1, 5e2]
content_wt_blend = [1, 0.5]
style_blend_weights = [0.4, 0.5, 0.8]
preserve_colors = [True, False]
# Build a list of list of hyperparameter values
hyperparams = [iterations, learning_rate, style_wt]
combinations = [item for item in itertools.product(*hyperparams)]
print('Some combinations for Neural Style Painting ...')
print(combinations)

Simple combination for a standard CNN skeleton for Image Classification ...
[(0.1, 20), (0.0001, 20)]
Some combinations for Neural Style Painting ...
[(500, 10.0, 50.0), (500, 10.0, 500.0), (500, 0.1, 50.0), (500, 0.1, 500.0)]

## Prepare the tensor flow program for execution
1. Build your Tensor based Estimator(Classifier) / or other deep learning model 
2. Copy over the program script to an accessible location on worker nooes
3. Architecture for the distributed setup for tensorflow
<img src="https://preview.ibb.co/kfKmKk/architecture_spark_tensorflow.png" alt="architecture_spark_tensorflow" border="0">
4. The above architecture will help us tune the model for the right parameters in a distributed manner

In [12]:
# 1. Setting up the command for a simple CNN example
sample_cmd = 'python "/mnt/cnn/cnn_tutorial.py" "--learning_rate" {} "--epoch" {}'
rdd_list = sc.parallelize(sample_combinations)
sample_cmd_rdd = rdd_list.map(lambda x: sample_cmd.format(x[0], x[1]))
sample_cmd_rdd.collect()


# 2. Setting up the command for an example code for Neural Style Painting setup
# Reference: Great implementation by Anish Athalye of the Gatys et al. paper on 
# Style Transfer(https://arxiv.org/pdf/1508.06576v2.pdf)
# https://github.com/datascienceinc/neural-style
cmd = 'python "/mnt/neural_style_painting/neural-style/neural_style.py" "--content" "/mnt/neural_style_painting/neural-style/examples/1-content.jpg" "--styles" "/mnt/neural_style_painting/neural-style/examples/1-style.jpg" "--output" "/tmp/results/f_d_{}.jpg" "--iterations" {} "--network" "/mnt/neural_style_painting/vgg_network/imagenet-vgg-verydeep-19.mat" "--learning-rate" {} "--style-weight" {}'
rdd_list = sc.parallelize(combinations)
cmd_rdd = rdd_list.map(lambda x: cmd.format(x[1], x[0], x[1], x[2]))
cmd_rdd.collect()

['python "/mnt/neural_style_painting/neural-style/neural_style.py" "--content" "/mnt/neural_style_painting/neural-style/examples/1-content.jpg" "--styles" "/mnt/neural_style_painting/neural-style/examples/1-style.jpg" "--output" "/tmp/results/f_d_10.0.jpg" "--iterations" 500 "--network" "/mnt/neural_style_painting/vgg_network/imagenet-vgg-verydeep-19.mat" "--learning-rate" 10.0 "--style-weight" 50.0', 'python "/mnt/neural_style_painting/neural-style/neural_style.py" "--content" "/mnt/neural_style_painting/neural-style/examples/1-content.jpg" "--styles" "/mnt/neural_style_painting/neural-style/examples/1-style.jpg" "--output" "/tmp/results/f_d_10.0.jpg" "--iterations" 500 "--network" "/mnt/neural_style_painting/vgg_network/imagenet-vgg-verydeep-19.mat" "--learning-rate" 10.0 "--style-weight" 500.0', 'python "/mnt/neural_style_painting/neural-style/neural_style.py" "--content" "/mnt/neural_style_painting/neural-style/examples/1-content.jpg" "--styles" "/mnt/neural_style_painting/neural-s

## Parallelize model parameter tunning

In [None]:
import subprocess
import shlex
from __future__ import print_function

# A simple function to extract model metric for evaluation
def get_metric(input_str):
    pattern = re.compile(r"Accuracy\:.*")
    return pattern.search(input_str).group(0)

# A simple function to do parallel computation
def launch_job(cmd_str):
    call_params = shlex.split(cmd_str)
    proc = subprocess.Popen(call_params, stdout=subprocess.PIPE,
            stderr=subprocess.PIPE)
    out, err = proc.communicate()
    return out

results = cmd_rdd.map(lambda k: get_metric(launch_job(k)))
results.collect()

## Ease of accessing "sparkmagic + Livy" kernel setup
<img src="https://preview.ibb.co/h7S95Q/platform_sparkmagic_livy.png" alt="platform_sparkmagic_livy">