In [21]:
import azureml.core
from azureml.core import Workspace

ws = Workspace.from_config()
print('Ready to use Azure ML {} to work with {}'.format(azureml.core.VERSION, ws.name))

Ready to use Azure ML 1.38.0 to work with try1


In [22]:
from azureml.core import Dataset

default_ds = ws.get_default_datastore()

if 'diabetes' not in ws.datasets:
    default_ds.upload_files(files=['./data/diabetes.csv', './data/diabetes2.csv'], 
                        target_path='diabetes-data/', 
                        overwrite=True,
                        show_progress=True)


    tab_data_set = Dataset.Tabular.from_delimited_files(path=(default_ds, 'diabetes-data/*.csv'))

   
    try:
        tab_data_set = tab_data_set.register(workspace=ws, 
                                name='diabetes dataset',
                                description='diabetes data',
                                tags = {'format':'CSV'},
                                create_new_version=True)
        print('Dataset registered.')
    except Exception as ex:
        print(ex)
else:
    print('Dataset already registered.')

Dataset already registered.


In [23]:
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

cluster_name = "dp100-cluster"

try:
    
    pipeline_cluster = ComputeTarget(workspace=ws, name=cluster_name)
    print('Found existing cluster, use it.')
except ComputeTargetException:
    try:
        compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_DS11_V2', max_nodes=2)
        pipeline_cluster = ComputeTarget.create(ws, cluster_name, compute_config)
        pipeline_cluster.wait_for_completion(show_output=True)
    except Exception as ex:
        print(ex)

Found existing cluster, use it.


In [24]:
from azureml.core import Environment
from azureml.core.conda_dependencies import CondaDependencies
from azureml.core.runconfig import RunConfiguration


diabetes_env = Environment("diabetes-pipeline-env")
diabetes_env.python.user_managed_dependencies = False # Let Azure ML manage dependencies
diabetes_env.docker.enabled = True # Use a docker container


diabetes_packages = CondaDependencies.create(conda_packages=['scikit-learn','ipykernel','matplotlib','pandas','pip'],
                                             pip_packages=['azureml-defaults','azureml-dataprep[pandas]','pyarrow'])


diabetes_env.python.conda_dependencies = diabetes_packages


diabetes_env.register(workspace=ws)
registered_env = Environment.get(ws, 'diabetes-pipeline-env')


pipeline_run_config = RunConfiguration()


pipeline_run_config.target = pipeline_cluster


pipeline_run_config.environment = registered_env

print ("Run configuration created.")

'enabled' is deprecated. Please use the azureml.core.runconfig.DockerConfiguration object with the 'use_docker' param instead.


Run configuration created.


In [25]:
import os
experiment_folder = 'diabetes_pipeline'
os.makedirs(experiment_folder, exist_ok=True)

print(experiment_folder)

diabetes_pipeline


In [26]:
from azureml.pipeline.core import PipelineData
from azureml.pipeline.steps import PythonScriptStep


diabetes_ds = ws.datasets.get("diabetes")


prepped_data_folder = PipelineData("prepped_data_folder", datastore=ws.get_default_datastore())


train_step = PythonScriptStep(name = "Prepare Data",
                                source_directory = experiment_folder,
                                script_name = "prep_diabetes.py",
                                arguments = ['--input-data', diabetes_ds.as_named_input('raw_data'),
                                             '--prepped-data', prepped_data_folder],
                                outputs=[prepped_data_folder],
                                compute_target = pipeline_cluster,
                                runconfig = pipeline_run_config,
                                allow_reuse = True)


register_step = PythonScriptStep(name = "Train and Register Model",
                                source_directory = experiment_folder,
                                script_name = "train_diabetes.py",
                                arguments = ['--training-folder', prepped_data_folder],
                                inputs=[prepped_data_folder],
                                compute_target = pipeline_cluster,
                                runconfig = pipeline_run_config,
                                allow_reuse = True)

print("Pipeline steps defined")

Pipeline steps defined


In [27]:
from azureml.core import Experiment
from azureml.pipeline.core import Pipeline
from azureml.widgets import RunDetails

# Construct the pipeline
pipeline_steps = [train_step, register_step]
pipeline = Pipeline(workspace=ws, steps=pipeline_steps)
print("Pipeline is built.")

# Create an experiment and run the pipeline
experiment = Experiment(workspace=ws, name = 'mslearn-diabetes-pipeline')
pipeline_run = experiment.submit(pipeline, regenerate_outputs=True)
print("Pipeline submitted for execution.")
RunDetails(pipeline_run).show()
pipeline_run.wait_for_completion(show_output=True)

Pipeline is built.
Created step Prepare Data [922be93d][35c6faac-43f4-4ff4-8dec-678bcbe16715], (This step will run and generate new outputs)
Created step Train and Register Model [ca0a36ce][22ab5fa8-b88c-4169-88dc-b68f39a980e4], (This step will run and generate new outputs)
Submitted PipelineRun 7ccc8306-b45c-4424-ba05-740dd27dd4dd
Link to Azure Machine Learning Portal: https://ml.azure.com/runs/7ccc8306-b45c-4424-ba05-740dd27dd4dd?wsid=/subscriptions/0dca8f46-772e-4ec6-b64e-e22adfa4ec53/resourcegroups/practise1/workspaces/try1&tid=e4e34038-ea1f-4882-b6e8-ccd776459ca0
Pipeline submitted for execution.


_PipelineWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', …

PipelineRunId: 7ccc8306-b45c-4424-ba05-740dd27dd4dd
Link to Azure Machine Learning Portal: https://ml.azure.com/runs/7ccc8306-b45c-4424-ba05-740dd27dd4dd?wsid=/subscriptions/0dca8f46-772e-4ec6-b64e-e22adfa4ec53/resourcegroups/practise1/workspaces/try1&tid=e4e34038-ea1f-4882-b6e8-ccd776459ca0
PipelineRun Status: Running


StepRunId: 000f4576-4d60-4690-8ecd-3f17896a4a1f
Link to Azure Machine Learning Portal: https://ml.azure.com/runs/000f4576-4d60-4690-8ecd-3f17896a4a1f?wsid=/subscriptions/0dca8f46-772e-4ec6-b64e-e22adfa4ec53/resourcegroups/practise1/workspaces/try1&tid=e4e34038-ea1f-4882-b6e8-ccd776459ca0
StepRun( Prepare Data ) Status: NotStarted
StepRun( Prepare Data ) Status: Running

StepRun(Prepare Data) Execution Summary
StepRun( Prepare Data ) Status: Finished
{'runId': '000f4576-4d60-4690-8ecd-3f17896a4a1f', 'target': 'dp100-cluster', 'status': 'Completed', 'startTimeUtc': '2022-02-14T14:19:14.829079Z', 'endTimeUtc': '2022-02-14T14:20:59.370825Z', 'services': {}, 'properties': {

'Finished'

In [28]:
for run in pipeline_run.get_children():
    print(run.name, ':')
    metrics = run.get_metrics()
    for metric_name in metrics:
        print('\t',metric_name, ":", metrics[metric_name])

Train and Register Model :
	 Accuracy : 0.886
	 AUC : 0.8723817105328623
	 ROC : aml://artifactId/ExperimentRun/dcid.c66d484a-2e45-4387-a2e9-f0b22b3ab3a0/ROC_1644848473.png
Prepare Data :
	 raw_rows : 10000
	 processed_rows : 10000


In [29]:
from azureml.core import Model

for model in Model.list(ws):
    print(model.name, 'version:', model.version)
    for tag_name in model.tags:
        tag = model.tags[tag_name]
        print ('\t',tag_name, ':', tag)
    for prop_name in model.properties:
        prop = model.properties[prop_name]
        print ('\t',prop_name, ':', prop)
    print('\n')

diabetes_model version: 5
	 Training context : Pipeline
	 AUC : 0.8723817105328623
	 Accuracy : 0.886


diabetes_model version: 4
	 Training context : Inline Training
	 AUC : 0.8761132394646499
	 Accuracy : 0.8893333333333333


diabetes_model version: 3
	 Training context : Inline Training
	 AUC : 0.8773695208716851
	 Accuracy : 0.891


diabetes_model version: 2
	 Training context : Inline Training
	 AUC : 0.8768744713667346
	 Accuracy : 0.8906666666666667


diabetes_model version: 1
	 Training context : Pipeline
	 AUC : 0.8751455296283397
	 Accuracy : 0.8896666666666667


