## Publish pipeline "pipeline_transform" to Azure

### Imports

In [None]:
import azureml.core
from azureml.core import Workspace, Datastore
from azureml.widgets import RunDetails

# check core SDK version number
print("Azure ML SDK Version: ", azureml.core.VERSION)

from azureml.data.data_reference import DataReference
from azureml.pipeline.core import Pipeline, PipelineData
from azureml.pipeline.steps import PythonScriptStep
from azureml.pipeline.core.graph import PipelineParameter
print("Pipeline SDK-specific imports completed")


## Parameters

In [None]:
python_pipeline_script = 'pipeline_step_transform_data.py'
published_pipeline_name = 'Test_transform_Pipeline_rm2'
published_pipeline_description = 'Published test transform Pipeline'

### Load workspace, datastore

In [None]:
# load workspace configuration from the config.json file in the current folder.
ws = Workspace.from_config(path='azure_config_dev.json')
print(ws.name, ws.location, ws.resource_group, ws.location, sep='\t')

# Blob storage associated with the workspace
# The following call GETS the Azure Blob Store associated with your workspace.
# Note that workspaceblobstore is **the name of this store and CANNOT BE CHANGED and must be used as is** 
def_blob_store = Datastore(ws, "workspaceblobstore")
print("Blobstore's name: {}".format(def_blob_store.name))

### Data reference to blob data store

In [None]:
blob_input_data = DataReference(
    datastore=def_blob_store,
    data_reference_name="input_data", 
    path_on_datastore="./data")
print("DataReference object created: ",blob_input_data)
print(blob_input_data.path('data').as_mount())

###  Get (or create) Compute target

In [None]:
from azureml.core.compute import AmlCompute
from azureml.core.compute import ComputeTarget
import os

# choose a name for your cluster
compute_name = os.environ.get("AML_COMPUTE_CLUSTER_NAME", "cpucluster") #gpucluster cpucluster aml-compute" # "try-gpu"
compute_min_nodes = os.environ.get("AML_COMPUTE_CLUSTER_MIN_NODES", 1) #1 to get ready machine
compute_max_nodes = os.environ.get("AML_COMPUTE_CLUSTER_MAX_NODES", 4)

# This example uses CPU VM. For using GPU VM, set SKU to STANDARD_NC6
vm_size = os.environ.get("AML_COMPUTE_CLUSTER_SKU", "STANDARD_D2_V2") #  "STANDARD_NC6")#"STANDARD_D2_V2")


if compute_name in ws.compute_targets:
    compute_target = ws.compute_targets[compute_name]
    if compute_target and type(compute_target) is AmlCompute:
        print('found compute target. just use it. ' + compute_name)
else:
    print('creating a new compute target...')
    provisioning_config = AmlCompute.provisioning_configuration(vm_size = vm_size,
                                                                min_nodes = compute_min_nodes, 
                                                                max_nodes = compute_max_nodes)

    # create the cluster
    compute_target = ComputeTarget.create(ws, compute_name, provisioning_config)
    
    # can poll for a minimum number of nodes and for a specific timeout. 
    # if no min node count is provided it will use the scale settings for the cluster
    compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)
    
     # For a more detailed view of current AmlCompute status, use get_status()
    print(compute_target.get_status().serialize())

### Prepare Pipeline

(Note - best to touch base script, ie pipeline_step_route_data.py, to ensure latest version is uploaded to Azure.
eg just change a print statement)

In [None]:
#script folder
import os
script_folder = os.path.join(os.getcwd(), "azure_upload_scripts")
os.makedirs(script_folder, exist_ok=True)
import shutil

ngamlfpy_package_name = 'ngamlfpy'

from distutils.dir_util import copy_tree
files_in_ngamlfpy_copied = copy_tree(ngamlfpy_package_name, os.path.join(script_folder,ngamlfpy_package_name))

print('Files in package ',ngamlfpy_package_name,' copied: ')
for file_copied in files_in_ngamlfpy_copied:
    print('  ',file_copied)

print(' ')    
print('Script copied: ')    
shutil.copy(python_pipeline_script, script_folder)

## Define Pipeline parameters

In [None]:
# We will use this later in publishing pipeline
pipeline_param_ml_service = PipelineParameter(name="ml_service", default_value="TWV")
print("pipeline parameter created")

In [None]:
# We will use this later in publishing pipeline
pipeline_param_model_name = PipelineParameter(name="model_name", default_value="T001")
print("pipeline parameter created")

In [None]:
# We will use this later in publishing pipeline
pipeline_param_model_version = PipelineParameter(name="model_version", default_value="001")
print("pipeline parameter created")

In [None]:
# We will use this later in publishing pipeline
pipeline_param_pipeline_param = PipelineParameter(name="pipeline_parameter", default_value='23')
print("pipeline parameter created")

In [None]:
# We will use this later in publishing pipeline
pipeline_param_json_input = PipelineParameter(name="json_input", default_value="{}")
print("pipeline parameter created")

In [None]:
# We will use this later in publishing pipeline
pipeline_param_normalise_labels = PipelineParameter(name="normalise_labels", default_value='N')
print("pipeline parameter created")

## Specify packages required etc

In [None]:
from azureml.core.runconfig import CondaDependencies, RunConfiguration
cd = CondaDependencies()

cd.add_channel("conda-forge")
#cd.add_conda_package("azureml-pipeline-core")
cd.add_pip_package("azureml-pipeline-core")
#cd.add_pip_package("azureml-data")
#cd.add_pip_package("azureml-core.azureml.data")
cd.add_pip_package("azureml-core")

cd.add_conda_package("scikit-learn")
cd.add_conda_package("pandas")
cd.add_conda_package("matplotlib")
cd.add_conda_package("numpy")
cd.add_conda_package("requests")

amlcompute_run_config = RunConfiguration(conda_dependencies=cd)
amlcompute_run_config.environment.docker.enabled = True
amlcompute_run_config.environment.docker.gpu_support = True



## Pipeline step: define python code to be published

In [None]:

step1 = PythonScriptStep(
    script_name=python_pipeline_script, 
    #arguments=["--input_data", blob_input_data, "--output_routed", routed_data, "--pipeline_parameter",pipeline_param],
    arguments = ["--input_data",blob_input_data,"--ml_service",pipeline_param_ml_service,"--json_input",pipeline_param_json_input,"--model_name",pipeline_param_model_name,"--model_version",pipeline_param_model_version,"--normalise_labels",pipeline_param_normalise_labels],
    inputs=[blob_input_data],
    outputs=[], #[routed_data],
    compute_target=compute_target, 
    source_directory=script_folder,
    runconfig=amlcompute_run_config, #added by me
    allow_reuse = False
)
print("Process Step created")

## Create Azure Pipeline object

In [None]:
pipeline = Pipeline(workspace=ws, steps=[step1] )#processDumpStep,processDumpStep2])
print ("Pipeline is built")
pipeline.validate()
print("Simple validation complete") 

## Publish pipeline to Azure

In [None]:
published_pipeline = pipeline.publish(name=published_pipeline_name, description=published_pipeline_description)
published_pipeline