# Valohai Notebook Sample

1. **Settings:**
    * **Project:** Valohai project where the executions will be version controlled
    * **Environment:** Environment type for the cloud executions (E.g. AWS p2.xlarge)
    * **Docker Image:** Docker image that provides the required libraries (E.g. TensorFlow) `valohai/pypermill` is the default image, but for this sample use `drazend/mnist-notebook:latest`

2. **Data:**
    * Upload your data to a cloud storage. You can use app.valohai.com to do this (Go to *Project -> Data -> Upload*)
    * Create a new cell for inputs. For each input create a variable with a link to the file in your cloud storage
    * All your data will be download to `/valohai/inputs/<name-of-input-variable/`.
        * `training_set_images = "https://valohaidemo.blob.core.windows.net/mnist/train-images-idx3-ubyte.gz"` will get downloaded to `/valohai/inputs/training_set_images/train-images-idx3-ubyte.gz`
    * Tag the cell as "inputs" (In this notebook UI *View->Cell Toolbar->Tags*)
    * Read all your data from `/valohai/inputs/` and save all your data to `/valohai/outputs/`

3. **Parameters:**
    * Create a new cell for parameters.
    * Place all your parameters in this cell.
    * Tag the cell as "parameters" (In this notebook UI *View->Cell Toolbar->Tags*)
    
4. **Tracking key metrics:**
    * Print out all key metrics as JSON (key/value)
        * `print(json.dumps({"step": int(i), "accuracy": float(acc), "loss": float(ce)}))`
    * If you'd like to save more complex plots and graphs, you can save them to `/valohai/ouputs/`
        * plt.savefig(`/valohai/outputs/graph.png`

In [None]:
training_set_images = "https://valohaidemo.blob.core.windows.net/mnist/train-images-idx3-ubyte.gz"
training_set_labels = "https://valohaidemo.blob.core.windows.net/mnist/train-labels-idx1-ubyte.gz"
test_set_images = "https://valohaidemo.blob.core.windows.net/mnist/t10k-images-idx3-ubyte.gz"
test_set_labels = "https://valohaidemo.blob.core.windows.net/mnist/t10k-labels-idx1-ubyte.gz"

In [None]:
# Parameters
learning_rate = 0.001
max_steps = 300
batch_size = 100

In [None]:
%%capture
%matplotlib inline
    
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import time
import os
import glob
import shutil
import json

device_name = tf.test.gpu_device_name()
if device_name == '':
    device_name = "None"
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

In [None]:
print('Using TensorFlow version:', tf.__version__)

In [None]:
from tensorflow.examples.tutorials.mnist import input_data as mnist_data

import numpy as np

import tensorflow as tf
old_v = tf.logging.get_verbosity()
tf.logging.set_verbosity(tf.logging.ERROR)
from tensorflow.examples.tutorials.mnist import input_data

inputs_dir = os.getenv('VH_INPUTS_DIR', 'data')
if inputs_dir != 'data':
    data_dir = '/tmp/data'
    os.mkdir(data_dir)
    for file in glob.glob(inputs_dir + "/training_set_images/*.*"):
        shutil.copy(file, data_dir)
    for file in glob.glob(inputs_dir + "/training_set_labels/*.*"):
        shutil.copy(file, data_dir)
    for file in glob.glob(inputs_dir + "/test_set_images/*.*"):
        shutil.copy(file, data_dir)
    for file in glob.glob(inputs_dir + "/test_set_labels/*.*"):
        shutil.copy(file, data_dir)
    inputs_dir = data_dir
        
mnist = mnist_data.read_data_sets(inputs_dir,
                                  one_hot=True,
                                  reshape=False,
                                  validation_size=0)

train_data = mnist.train.images  # Returns np.array
train_labels = np.asarray(mnist.train.labels, dtype=np.int32)
eval_data = mnist.test.images  # Returns np.array
eval_labels = np.asarray(mnist.test.labels, dtype=np.int32)

In [None]:
# Placeholders for input images and correct labels:
X = tf.placeholder(tf.float32, [None, 28, 28, 1])
Y_ = tf.placeholder(tf.float32, [None, 10])

# Variables for weights of two hidden layers:
L1, L2 = 100, 200
W1 = tf.Variable(tf.truncated_normal([784, L1], stddev=0.1))  # 784 = 28 * 28
B1 = tf.Variable(tf.zeros([L1]))
W2 = tf.Variable(tf.truncated_normal([L1, L2], stddev=0.1))
B2 = tf.Variable(tf.zeros([L2]))
W3 = tf.Variable(tf.truncated_normal([L2, 10], stddev=0.1))
B3 = tf.Variable(tf.zeros([10]))

# The MLP model:
XX = tf.reshape(X, [-1, 784])
Y1 = tf.nn.sigmoid(tf.matmul(XX, W1) + B1)
Y2 = tf.nn.sigmoid(tf.matmul(Y1, W2) + B2)
Ylogits = tf.matmul(Y2, W3) + B3
Y = tf.nn.softmax(Ylogits, name='output')

# Cross-entropy loss function:
cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2(logits=Ylogits, labels=Y_)
cross_entropy = tf.reduce_mean(cross_entropy)*100

# Prediction accuracy:
correct_prediction = tf.equal(tf.argmax(Y, 1), tf.argmax(Y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

# Learning rate and the used optimizer:
train_step = tf.train.AdamOptimizer(learning_rate).minimize(cross_entropy)

In [None]:
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)

In [None]:
acc_v,  ce_v = [], []

for i in range(max_steps):
    batch_X, batch_Y = mnist.train.next_batch(batch_size) # minibatch
    acc, ce, _ = sess.run([accuracy, cross_entropy, train_step], 
                          {X: batch_X, Y_: batch_Y})
    acc_v.append(acc); ce_v.append(ce)
    print(json.dumps({"step": int(i), "accuracy": float(acc), "loss": float(ce)}))
    time.sleep(0.03)

In [None]:
plt.figure(figsize=(5,3))
plt.plot(range(max_steps), ce_v)
plt.title('cross-entropy loss')

plt.figure(figsize=(5,3))
plt.plot(range(max_steps), acc_v)
plt.title('accuracy');

In [None]:
outputs_dir = os.getenv('VH_OUTPUTS_DIR', './outputs')
if outputs_dir == './outputs':
    if os.path.exists(outputs_dir):
        shutil.rmtree(outputs_dir)
    os.makedirs(outputs_dir)
pbpath = os.path.join(outputs_dir, 'model.pb')
output_graph_def = tf.graph_util.convert_variables_to_constants(
    sess=sess,
    input_graph_def=sess.graph.as_graph_def(),
    output_node_names=['output'],
)
with tf.gfile.FastGFile(pbpath, 'wb') as f:
    f.write(output_graph_def.SerializeToString())
    print ("Saved %s" % pbpath)