In [1]:
# Set the folder for the experiment files used in Challenge 2
training_folder = 'driver-training'

## register_model.py
This script loads the model from where it was saved, and then registers it in the workspace. This will be the second step in the pipeline. The script is written to the experiment folder from this notebook for convenience.

In [2]:
%%writefile $training_folder/register_model.py
# Import libraries
import argparse
from azureml.core import Run

# Get parameters
parser = argparse.ArgumentParser()
parser.add_argument('--model_folder', type=str, dest='model_folder',
                    default="driver-training", help='model location')
args = parser.parse_args()
model_folder = args.model_folder

# Get the experiment run context
run = Run.get_context()

# load the model
print("Loading model from " + model_folder)
model_name = 'porto_seguro_safe_driver_model'
model_file = model_folder + "/" + model_name + ".pkl"

# Get metrics for registration
metrics = run.parent.get_metrics()
## HINT: Try storing the metrics in the parent run, which will be
##       accessible during both the training and registration
##       child runs using the 'run.parent' API.

# Register the model
run.upload_file(model_name, model_file)
run.register_model(
    model_path=model_name,
    model_name=model_name,
    tags=metrics)

run.complete()


Overwriting driver-training/register_model.py


## Create an Azure Machine Learning Pipeline to Run the Scripts as a Pipeline

See [this tutorial](https://github.com/MicrosoftDocs/mslearn-aml-labs/blob/master/05-Creating_a_Pipeline.ipynb) for a starting point

Use the scikit-learn and lightgbm conda packages

In [3]:
import azureml.core
from azureml.core import Workspace

# Load the workspace
ws = Workspace.from_config()

Failure while loading azureml_run_type_providers. Failed to load entrypoint automl = azureml.train.automl.run:AutoMLRun._from_run_dto with exception (portalocker 2.0.0 (/anaconda/envs/azureml_py36/lib/python3.6/site-packages), Requirement.parse('portalocker~=1.0'), {'msal-extensions'}).


In [4]:
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

cluster_name = "mlops-cc"

try:
    # Check for existing compute target
    pipeline_cluster = ComputeTarget(workspace=ws, name=cluster_name)
    print('Found existing cluster, use it.')
except ComputeTargetException:
    # If it doesn't already exist, create it
    try:
        compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_DS11_V2', max_nodes=2)
        pipeline_cluster = ComputeTarget.create(ws, cluster_name, compute_config)
        pipeline_cluster.wait_for_completion(show_output=True)
    except Exception as ex:
        print(ex)

Found existing cluster, use it.


In [5]:
from azureml.core import Environment
from azureml.core.conda_dependencies import CondaDependencies
from azureml.core.runconfig import RunConfiguration

# Create a Python environment for the experiment
driver_env = Environment("driver-pipeline-env")
driver_env.python.user_managed_dependencies = False # Let Azure ML manage dependencies
driver_env.docker.enabled = True # Use a docker container

# Create a set of package dependencies
driver_packages = CondaDependencies.create(conda_packages=['scikit-learn','pandas', 'lightgbm'],
                                             pip_packages=['azureml-defaults','azureml-dataprep[pandas]'])

# Add the dependencies to the environment
driver_env.python.conda_dependencies = driver_packages

# Register the environment (just in case you want to use it again)
driver_env.register(workspace=ws)
registered_env = Environment.get(ws, 'driver-pipeline-env')

# Create a new runconfig object for the pipeline
pipeline_run_config = RunConfiguration()

# Use the compute you created above. 
pipeline_run_config.target = pipeline_cluster

# Assign the environment to the run configuration
pipeline_run_config.environment = registered_env

print ("Run configuration created.")

Run configuration created.


In [6]:
from azureml.pipeline.core import PipelineData
from azureml.pipeline.steps import PythonScriptStep, EstimatorStep
from azureml.train.estimator import Estimator

# Get the training dataset
driver_ds = ws.datasets.get("driver dataset")

# Create a PipelineData (temporary Data Reference) for the model folder
model_folder = PipelineData("model_folder", datastore=ws.get_default_datastore())

# Create Estimator to train the model as in Challenge 2
estimator = Estimator(source_directory=training_folder,
                      entry_script='driver_training.py',
                      compute_target=pipeline_cluster,
                      environment_definition=pipeline_run_config.environment)

# Create Step 1, which runs the estimator to train the model
train_step = EstimatorStep(name = "Train Model",
                           estimator=estimator, 
                           estimator_entry_script_arguments=['--output_folder', model_folder],
                           inputs=[driver_ds.as_named_input('driver_train')],
                           outputs=[model_folder],
                           compute_target = pipeline_cluster,
                           allow_reuse = True)

# Create Step 2, which runs the model registration script
register_step = PythonScriptStep(name = "Register Model",
                                source_directory = training_folder,
                                script_name = "register_model.py",
                                arguments = ['--model_folder', model_folder],
                                inputs=[model_folder],
                                compute_target = pipeline_cluster,
                                runconfig = pipeline_run_config,
                                allow_reuse = True)
print("Pipeline steps defined")

Pipeline steps defined


In [7]:
from azureml.core import Experiment
from azureml.pipeline.core import Pipeline
from azureml.widgets import RunDetails

# Construct the pipeline
pipeline_steps = [train_step, register_step]
pipeline = Pipeline(workspace = ws, steps=pipeline_steps)
print("Pipeline is built.")

# Create an experiment and run the pipeline
experiment = Experiment(workspace = ws, name = 'driver-training-pipeline')
pipeline_run = experiment.submit(pipeline, regenerate_outputs=True)
print("Pipeline submitted for execution.")
pipeline_run.wait_for_completion(show_output=True)

Pipeline is built.
Created step Train Model [743c947f][26dc8131-f09f-4352-a1f4-178976a9b395], (This step will run and generate new outputs)
Created step Register Model [7928f698][a7575e1b-ceb9-4d54-9fad-5bd20798b744], (This step will run and generate new outputs)
Submitted PipelineRun c25e809f-4b13-43b4-ac4e-2fad95ecbf79
Link to Azure Machine Learning Portal: https://ml.azure.com/experiments/driver-training-pipeline/runs/c25e809f-4b13-43b4-ac4e-2fad95ecbf79?wsid=/subscriptions/7c9d382c-5964-48db-9cf6-c595c7ba4339/resourcegroups/mlops-rg/workspaces/mlops-aml
Pipeline submitted for execution.
PipelineRunId: c25e809f-4b13-43b4-ac4e-2fad95ecbf79
Link to Azure Machine Learning Portal: https://ml.azure.com/experiments/driver-training-pipeline/runs/c25e809f-4b13-43b4-ac4e-2fad95ecbf79?wsid=/subscriptions/7c9d382c-5964-48db-9cf6-c595c7ba4339/resourcegroups/mlops-rg/workspaces/mlops-aml
PipelineRun Status: NotStarted
PipelineRun Status: Running


StepRunId: c4ad3808-73c9-431d-b90d-99cf99e66e67


'Finished'

In [8]:
# Print the model name, version, tag, and properties
from azureml.core import Model

for model in Model.list(ws):
    print(model.name, 'version:', model.version)
    for tag_name in model.tags:
        tag = model.tags[tag_name]
        print ('\t',tag_name, ':', tag)
    for prop_name in model.properties:
        prop = model.properties[prop_name]
        print ('\t',prop_name, ':', prop)
    print('\n')

porto_seguro_safe_driver_model version: 3
	 learning_rate : 0.02
	 boosting_type : gbdt
	 objective : binary
	 metric : auc
	 sub_feature : 0.7
	 num_leaves : 60
	 min_data : 100
	 min_hessian : 1
	 verbose : 0
	 auc : 0.6377511613946426


driver_model.pkl version: 2
	 auc : 0.6377511613946426


porto_seguro_safe_driver_model version: 2
	 learning_rate : 0.02
	 boosting_type : gbdt
	 metric : auc
	 objective : binary
	 sub_feature : 0.7
	 min_data : 100
	 num_leaves : 60
	 min_hessian : 1
	 verbose : 0
	 auc : 0.6377511613946426


porto_seguro_safe_driver_model version: 1


driver_model.pkl version: 1
	 auc : 0.6377511613946426


