Copyright (c) Microsoft Corporation. All rights reserved.

Licensed under the MIT License.

# Using different computes for different nodes of the pipeline
This example, uses a gpu dsvm and cpu dsvm to do training (gpu) and prediction (cpu) of a pre-trained mobilenet on the flowers dataset. It consists of following steps:
1. Train mobilenet on gpu using a pretrained mobilenet on imagenet. (GPU batch ai)
2. Predict on a hold out set of images (CPU batch ai). This hold out set has been put in a public blob container

In [None]:
import os
from azureml.core import Workspace, Run, Experiment

ws = Workspace.from_config()
print('Workspace name: ' + ws.name, 
      'Azure region: ' + ws.location, 
      'Subscription id: ' + ws.subscription_id, 
      'Resource group: ' + ws.resource_group, sep = '\n')

# Also create a Project and attach to Workspace
project_folder = "scripts"
run_history_name = project_folder

if not os.path.isdir(project_folder):
    os.mkdir(project_folder)

In [None]:
from azureml.core.compute import BatchAiCompute, ComputeTarget
from azureml.core.datastore import Datastore
from azureml.data.data_reference import DataReference
from azureml.pipeline.core import Pipeline, PipelineData
from azureml.pipeline.steps import PythonScriptStep

In [None]:
gpu_cluster_name = "gpu-cluster"
try:
    gpu_cluster = BatchAiCompute(ws, gpu_cluster_name)
    print("found existing cluster.")
except:
    print("creating new cluster")
    provisioning_config = BatchAiCompute.provisioning_configuration(vm_size = "STANDARD_NC6",
                                                                    autoscale_enabled = True,
                                                                    cluster_min_nodes = 1, 
                                                                    cluster_max_nodes = 1)

    # create the cluster
    gpu_cluster = ComputeTarget.create(ws, gpu_cluster_name, provisioning_config)
    gpu_cluster.wait_for_completion(show_output=True)

cpu_cluster_name = "cpu-cluster"
try:
    cpu_cluster = BatchAiCompute(ws, cpu_cluster_name)
    print("found existing cluster.")
except:
    print("creating new cluster")
    provisioning_config = BatchAiCompute.provisioning_configuration(vm_size = "STANDARD_D2_v2",
                                                                    autoscale_enabled = True,
                                                                    cluster_min_nodes = 3, 
                                                                    cluster_max_nodes = 3)

    # create the cluster
    cpu_cluster = ComputeTarget.create(ws, cpu_cluster_name, provisioning_config)
    cpu_cluster.wait_for_completion(show_output=True)    

# Python scripts
We define several scripts that will be run on remote computes:
- `retrain.py` and `label_image.py` from `tensorflow` repo: Contain code for training from a pre-trained model. The scripts take care of downloading the pretrained model using `tensorflow-hub`, resizing to the input size required by the model and applying transformations to the images.
- `predict_dir.py`: Run evaluation on a directory of images in a blob container.

# Training code

We use transfer learning code from tensorflow repo with a minor modification to take an output directory where the graph and labels are written to. The file called `retrain.py` is included in this directory with the original [here](https://raw.githubusercontent.com/tensorflow/hub/r0.1/examples/image_retraining/retrain.py)  

In [None]:
import shutil
shutil.copy("retrain.py", project_folder)
shutil.copy("label_image.py", project_folder)

# Prediction code

We use the following file for prediction. It uses `label_image.py` from tensorflow's repo. The original file is [here](https://raw.githubusercontent.com/tensorflow/tensorflow/master/tensorflow/examples/label_image/label_image.py)

In [None]:
%%writefile $project_folder/predict_dir.py
import argparse
import numpy as np
import os
import shutil
import tensorflow as tf
import label_image

def predict_dir(args):
    print("Inside predict")
    graph_file = os.path.join(args.train_dir, "output_graph.pb")
    output_labels_file = os.path.join(args.train_dir, "output_labels.txt")
    
    graph = label_image.load_graph(graph_file)
    input_layer = "Placeholder"
    output_layer = "final_result"
    
    results_file = os.path.join(args.output_dir, "results.txt")
    with open(results_file, "w") as fp:
        for filename in os.listdir(args.dir):
            full_path = os.path.join(args.dir, filename)
            t = label_image.read_tensor_from_image_file(
                full_path,
                input_height=224,
                input_width=224,
                input_mean=128,
                input_std=128)
            input_name = "import/" + input_layer
            output_name = "import/" + output_layer
            input_operation = graph.get_operation_by_name(input_name)
            output_operation = graph.get_operation_by_name(output_name)

            with tf.Session(graph=graph) as sess:
                results = sess.run(output_operation.outputs[0], {
                    input_operation.outputs[0]: t
                })
            results = np.squeeze(results)

            top_k = results.argsort()[-5:][::-1]
            labels = label_image.load_labels(output_labels_file)
            fp.write("Filename: {}\n".format(filename))
            for i in top_k:
                fp.write("{}, {}\n".format(labels[i], results[i]))
    
    shutil.copy(results_file, "./outputs/")

parser = argparse.ArgumentParser()
parser.add_argument("--dir", help="directory to be processed")
parser.add_argument("--train_dir", help="directory containing graph and labels to be executed")
parser.add_argument("--output_dir", help="output directory")

args = parser.parse_args()
os.makedirs(args.output_dir, exist_ok=True)
predict_dir(args)

# Specify the python packages to install
We use tensorflow that has a specific version dependency which needs a certain version of cublas. Therefore for gpu, we use a prebuilt image that has those dependencies. We also use tensorflow-hub which makes retrieving prebuilt models easy.

We define two runconfig environments for each of the dsvm. 

In [None]:
from azureml.core.runconfig import CondaDependencies, RunConfiguration
cd = CondaDependencies.create(pip_packages=['tensorflow==1.10', 'azureml-defaults'])
cpu_runconfig = RunConfiguration(conda_dependencies=cd)
cpu_runconfig.environment.docker.enabled = True

cd = CondaDependencies.create(pip_packages=['tensorflow-gpu==1.10', "tensorflow-hub", 'azureml-defaults'])
gpu_runconfig = RunConfiguration(conda_dependencies=cd)
gpu_runconfig.environment.docker.enabled = True
gpu_runconfig.environment.docker.gpu_support = True
gpu_runconfig.environment.docker.base_image = "himanshuaml/tensorflow_conda:latest-gpu-py3"

# Define blob container that will store the outputs
We use the default blob datastore that comes with the workspace. 

In [None]:
default_datastore = ws.get_default_datastore()

# Evaluation
This is a public container shared by us that contains a few images downloaded from the web.

In [None]:
sample_images_account_name = "pipelinedata"
sample_images = Datastore.register_azure_blob_container(ws, "sampledata", "sampledata", 
                                                        account_name=sample_images_account_name, 
                                                        overwrite=True)

# Define inputs to the steps

In [None]:
dataset_dir = DataReference(data_reference_name="dataset_dir", datastore=sample_images, path_on_datastore="flower_photos", mode="download")
train_dir = PipelineData("train_dir", datastore=default_datastore)
output_dir = PipelineData("output_dir", datastore=default_datastore)
pred_dir = DataReference(datastore=sample_images, data_reference_name="sampledata", path_on_datastore="flowers", mode="download")

In [None]:
train_step = PythonScriptStep(
    name="train",
    script_name="retrain.py",
    arguments=["--image_dir", dataset_dir, 
               "--tfhub_module", "https://tfhub.dev/google/imagenet/mobilenet_v2_130_224/feature_vector/2",
               "--train_dir",train_dir,
               "--how_many_training_steps", 50
              ],
    inputs=[dataset_dir],
    outputs=[train_dir],
    source_directory=project_folder,
    target=gpu_cluster,
    runconfig=gpu_runconfig
)

test_step = PythonScriptStep(
    name="evaluation",
    script_name="predict_dir.py",
    arguments=["--train_dir", train_dir, "--dir", pred_dir, "--output_dir", output_dir],
    inputs=[train_dir, pred_dir],
    outputs=[output_dir],
    source_directory=project_folder,
    target=cpu_cluster,
    runconfig=cpu_runconfig
)

In [None]:
pipeline = Pipeline(workspace=ws, steps=[test_step])

In [None]:
pipeline.validate()
pipeline_run = Experiment(ws, "train_test_images").submit(pipeline)

# Monitor using widget

In [None]:
from azureml.train.widgets import RunDetails
RunDetails(pipeline_run).show()

# Show some results

In [None]:
pipeline_run.wait_for_completion()

In [None]:
node_run = list(pipeline_run.get_children())[0]
node_run.download_file("./outputs/results.txt")

# Show first few lines of file
with open("results.txt") as fp:
    lines = fp.readlines()

print("\n".join(lines[:10]))

# Optionally clean compute resources

In [None]:
#gpu_cluster.delete()
#cpu_cluster.delete()