# Versioning a Pipeline

In the last two labs, you [created](labdocs/Lab06A.md) a pipeline and [published](labdocs/Lab06B.md) it as a service.  Now you're going to learn about versioning published pipeline.

## Connect to Your Workspace

The first thing you need to do is to connect to your workspace using the Azure ML SDK.

> **Note**: If the authenticated session with your Azure subscription has expired since you completed the previous exercise, you'll be prompted to reauthenticate.

In [None]:
import azureml.core
from azureml.core import Workspace

# Load the workspace from the saved config file
ws = Workspace.from_config()
print('Ready to use Azure ML {} to work with {}'.format(azureml.core.VERSION, ws.name))

## Create Scripts for Pipeline Steps

In [None]:
import os
# Create a folder for the pipeline step files
experiment_folder = 'versioning'
os.makedirs(experiment_folder, exist_ok=True)

print(experiment_folder)

In [None]:
%%writefile $experiment_folder/train.py
# Import libraries
import argparse


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--some_parameter', type=str, dest='some_parameter', default="abc", help='Some parameter value')
    args = parser.parse_args()    

    print(f"Argument some_parameter: {args.some_parameter}")
    
    print("Here we would train a model...")
    
    print("Done!")
    
    
if __name__ == "__main__":
    main()


## Prepare a Compute Environment for the Pipeline Steps

In [None]:
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

cluster_name = "aml-cluster"

# Verify that cluster exists
try:
    pipeline_cluster = ComputeTarget(workspace=ws, name=cluster_name)
    print('Found existing cluster, use it.')
except ComputeTargetException:
    # If not, create it
    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_DS2_V2', max_nodes=2)
    pipeline_cluster = ComputeTarget.create(ws, cluster_name, compute_config)

pipeline_cluster.wait_for_completion(show_output=True)

## Compose and Publish the Pipeline

In [None]:
from azureml.core import Experiment
from azureml.core.runconfig import RunConfiguration
from azureml.pipeline.core import Pipeline
from azureml.pipeline.steps import PythonScriptStep


# Create a new runconfig object for the pipeline
pipeline_run_config = RunConfiguration()
# Use the compute you created above. 
pipeline_run_config.target = pipeline_cluster

step = PythonScriptStep(
    name='training',
    source_directory=experiment_folder,
    script_name='train.py',
    compute_target=pipeline_cluster,
    runconfig=pipeline_run_config
)

pipeline_steps = [step]
pipeline = Pipeline(workspace=ws, steps=pipeline_steps)
pipeline.publish(name='Model training pipeline', description='', version='1')
print("Pipeline published!")

experiment_name = 'model-training'
experiment = Experiment(ws, name=experiment_name)
experiment.submit(pipeline)
print("Experiment submitted!")

## Publishing - a better way

### Specify the pipeline run you want to publish

In [None]:
from azureml.pipeline.core import PipelineRun

pipeline_experiment = ws.experiments.get(experiment_name)

# get the latest completed or
# pipeline_run = None
# for run in pipeline_experiment.get_runs():
#     if run.status == 'Completed':
#         pipeline_run = run
#         break
# the specific one
pipeline_run = PipelineRun(pipeline_experiment, run_id='17856a2c-0e35-442e-958c-7bdd0f203b26')
print(pipeline_run)

### Create a PipelineEndpoint and use it as a facade for published pipelines

In [None]:
from azureml.pipeline.core import PipelineEndpoint, PublishedPipeline

pipeline_version = '2.0'
pipeline_name_prefix = 'Model training pipeline'
pipeline_name = f"{pipeline_name_prefix} - version {pipeline_version}"

found_published_pipeline = None
for published_pipeline in PublishedPipeline.list(ws):
    if published_pipeline.name.startswith(pipeline_name_prefix) and published_pipeline.version == pipeline_version:
        found_published_pipeline = published_pipeline
        break

if found_published_pipeline is not None:
    print("Found the version, about to add the specified pipeline to pipeline endpoint...")
    print(found_published_pipeline)
    
    # publish a new edition of this pipeline version
    newly_published_pipeline = pipeline.publish(
        name=pipeline_name, 
        description="Trains model",
        version=pipeline_version) 
    # set a specified pipeline as a new available edition (preserving URL address)
    pipeline_endpoint = PipelineEndpoint.get(workspace=ws, name=pipeline_name)
    pipeline_endpoint.add_default(newly_published_pipeline)   

else:
    print("Not found, about to publish a new version...")

    # Publish pipeline 
    published_pipeline = pipeline_run.publish_pipeline(
        name=pipeline_name, 
        description="Trains model", 
        version=pipeline_version)
    
    pipeline_endpoint = PipelineEndpoint.publish(
        workspace=ws, 
        name=pipeline_name, 
        description="Trains model", 
        pipeline=published_pipeline)

### List all published pipelines

In [None]:
for published_pipeline in PublishedPipeline.list(ws):
    print(published_pipeline)
    print("Version: ", published_pipeline.version)
    print()

### List active Pipeline Endpoints

In [None]:
for endpoint in PipelineEndpoint.list(ws, active_only=False):
    print(endpoint)
    print()

## Use the Pipeline Endpoint

In [None]:
from azureml.core.authentication import InteractiveLoginAuthentication

interactive_auth = InteractiveLoginAuthentication()
auth_header = interactive_auth.get_authentication_header()
print("Authentication header ready.")

In [None]:
import requests

rest_endpoint = f'https://westeurope.api.azureml.ms/pipelines/v1.0/subscriptions/{subscription_id}/resourceGroups/{resource_group_name}/' + \
                'providers/Microsoft.MachineLearningServices/workspaces/AMLService/PipelineRuns/PipelineEndpointSubmit/Id/' + \
                'e1eeb9f3-827e-438f-beeb-4792d821bd4b'
response = requests.post(rest_endpoint, 
                         headers=auth_header, 
                         json={"ExperimentName": experiment_name})
run_id = response.json()["Id"]
run_id