In [17]:
import azureml.core
from azureml.core import Workspace

# Load the workspace from the saved config file
ws = Workspace.from_config()
print('Ready to use Azure ML {} to work with {}'.format(azureml.core.VERSION, ws.name))

Ready to use Azure ML 1.26.0 to work with mlw_sdk


In [18]:
from azureml.core.compute import ComputeTarget
from azureml.core import Dataset, Environment

if False:
    print("Datasets:")
    [print(dataset.name) for dataset in Dataset.list(ws)]
    
    print("\nCompute Targets:")
    [print(compute.name) for compute in ComputeTarget.list(ws)]
        
    print("\nEnvironments:")
    [print(env) for env in Environment.list(ws)]

In [19]:
from azureml.core import RunConfiguration

#Select current environment
sklearn_env_test = Environment.get(ws,name="sklearn_env")
runconfig = RunConfiguration()
runconfig.environment = sklearn_env_test

#Select compute target
pipeline_compute = ComputeTarget(workspace=ws, name='Stand')

In [20]:
from azureml.core import Experiment
experiment_name = 'Pipeline_first_try'
exp_run = Experiment(workspace = ws, name = experiment_name)

In [6]:
#Input dataset for training
train_ds = Dataset.get_by_name(ws, "feature_selected_train_FD001")
test_ds = Dataset.get_by_name(ws, "feature_selected_test_FD001")
pipeline_path = experiment_name

In [7]:
from azureml.pipeline.core import Pipeline
from azureml.pipeline.steps  import PythonScriptStep
from azureml.data import OutputFileDatasetConfig

In [8]:
# Create a PipelineData (temporary Data Reference) for the model folder
scaled_train_ds = OutputFileDatasetConfig(name = "scaled_train_data",
                                                  destination = (ws.get_default_datastore(),
                                                                pipeline_path + "/output_datasets"))

# Step 1, scale training data
scale_train_step = PythonScriptStep(name = "Scale Training Data",
                                source_directory = '.',
                                script_name = "scale_training.py",
                                arguments = ['--input-data', train_ds.as_named_input('feature_selected_train_FD001'),
                                             '--scaled-train-data', scaled_train_ds],
                                compute_target = pipeline_compute,
                                runconfig = runconfig,
                                allow_reuse = True)

In [9]:
trained_model = OutputFileDatasetConfig(name = "trained_data",
                                                  destination = (ws.get_default_datastore(),
                                                                pipeline_path + "/output_models"))

# Step 2, train data
train_step = PythonScriptStep(name = "Train Data",
                                source_directory = '.',
                                script_name = "train_data.py",
                                arguments = ['--scaled-train-data', scaled_train_ds.as_input(),
                                             '--trained-data', trained_model],
                                compute_target = pipeline_compute,
                                runconfig = runconfig,
                                allow_reuse = True)

In [11]:
scaled_test_ds = OutputFileDatasetConfig(name = "scaled_test_data",
                                                  destination = (ws.get_default_datastore(),
                                                                pipeline_path + "/output_datasets"))

# Step 3, scale testing data
scale_test_step = PythonScriptStep(name = "Scale Testing Data",
                                source_directory = '.',
                                script_name = "scale_test.py",
                                arguments = ['--input-data', test_ds.as_named_input('feature_selected_test_FD001'),
                                             '--scaler', scaled_train_ds.as_input(),
                                             '--scaled-test-data', scaled_test_ds],
                                compute_target = pipeline_compute,
                                runconfig = runconfig,
                                allow_reuse = True)

In [12]:
predict_test = OutputFileDatasetConfig(name = "predict_test_data",
                                                  destination = (ws.get_default_datastore(),
                                                                pipeline_path + "/predictions"))

# Step 4, predict on test data
predict_test_step = PythonScriptStep(name = "Predict on testing data",
                                source_directory = '.',
                                script_name = "predict_test_data.py",
                                arguments = ['--scaled-test-data', scaled_test_ds.as_input(),
                                             '--trained-data', trained_model.as_input(),
                                             '--predicted-test-data', predict_test],
                                compute_target = pipeline_compute,
                                runconfig = runconfig,
                                allow_reuse = True)

In [13]:
steps = [scale_train_step, train_step, scale_test_step]
#, predict_test_step]

In [14]:
training_pipeline = Pipeline(workspace = ws, steps = steps, description = "Test")

In [15]:
training_pipeline_run_v1 = exp_run.submit(training_pipeline, regenerate_outputs = False)

Created step Scale Training Data [24eab2ac][88d67644-d67a-45b5-a3cb-5a83fa04f849], (This step will run and generate new outputs)
Created step Train Data [1b01335b][e3aadc27-bf34-4e2e-9aff-7ed226fb1411], (This step will run and generate new outputs)
Created step Scale Testing Data [c39e4452][c4d56844-4415-458c-b25b-4897cb641a03], (This step will run and generate new outputs)
Submitted PipelineRun f1db20db-81c3-4e79-9d8b-458bfbcfd6e7
Link to Azure Machine Learning Portal: https://ml.azure.com/runs/f1db20db-81c3-4e79-9d8b-458bfbcfd6e7?wsid=/subscriptions/bb345bae-e66f-461d-a1fe-67219d54a0f3/resourcegroups/rg-sandbox-kolbjorn/workspaces/mlw_sdk&tid=40cc2915-e283-4a27-9471-6bdd7ca4c6e1


In [16]:
from azureml.widgets import RunDetails

RunDetails(training_pipeline_run_v1).show()
training_pipeline_run_v1.wait_for_completion()

_PipelineWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', …

PipelineRunId: f1db20db-81c3-4e79-9d8b-458bfbcfd6e7
Link to Azure Machine Learning Portal: https://ml.azure.com/runs/f1db20db-81c3-4e79-9d8b-458bfbcfd6e7?wsid=/subscriptions/bb345bae-e66f-461d-a1fe-67219d54a0f3/resourcegroups/rg-sandbox-kolbjorn/workspaces/mlw_sdk&tid=40cc2915-e283-4a27-9471-6bdd7ca4c6e1
PipelineRun Status: Running


StepRunId: 5bc5bb3b-4989-47ee-94d7-15f25aa85685
Link to Azure Machine Learning Portal: https://ml.azure.com/runs/5bc5bb3b-4989-47ee-94d7-15f25aa85685?wsid=/subscriptions/bb345bae-e66f-461d-a1fe-67219d54a0f3/resourcegroups/rg-sandbox-kolbjorn/workspaces/mlw_sdk&tid=40cc2915-e283-4a27-9471-6bdd7ca4c6e1
StepRun( Scale Training Data ) Status: NotStarted
StepRun( Scale Training Data ) Status: Running

Streaming azureml-logs/55_azureml-execution-tvmps_034c3ef52c0cf245127515e14911d3af66c5efe9beb19622a50fbe8268d30e98_d.txt
2021-04-30T07:14:36Z Successfully mounted a/an Blobfuse File System at /mnt/batch/tasks/shared/LS_root/jobs/mlw_sdk/azureml/5bc5bb3b-4989-47ee-

Saving Data...
Saving Scaler...


[2021-04-30T07:15:45.566225] The experiment completed successfully. Finalizing run...
Cleaning up all outstanding Run operations, waiting 900.0 seconds
2 items cleaning up...
Cleanup took 0.11700654029846191 seconds
[2021-04-30T07:15:45.903433] Finished context manager injector.
2021/04/30 07:15:51 Attempt 1 of http call to http://10.0.0.4:16384/sendlogstoartifacts/status
2021/04/30 07:15:51 Not exporting to RunHistory as the exporter is either stopped or there is no data.
Stopped: false
OriginalData: 2
FilteredData: 0.
2021/04/30 07:15:51 Process Exiting with Code:  0
2021/04/30 07:15:51 All App Insights Logs was send successfully

Streaming azureml-logs/75_job_post-tvmps_034c3ef52c0cf245127515e14911d3af66c5efe9beb19622a50fbe8268d30e98_d.txt
[2021-04-30T07:15:52.377092] Entering job release
[2021-04-30T07:15:53.743435] Starting job release
[2021-04-30T07:15:53.744119] Logging experiment finalizing status in history service.
Starting the daemon thread 




StepRunId: d35b40ca-f1b9-4d50-99e1-8c824c4dccd8
Link to Azure Machine Learning Portal: https://ml.azure.com/runs/d35b40ca-f1b9-4d50-99e1-8c824c4dccd8?wsid=/subscriptions/bb345bae-e66f-461d-a1fe-67219d54a0f3/resourcegroups/rg-sandbox-kolbjorn/workspaces/mlw_sdk&tid=40cc2915-e283-4a27-9471-6bdd7ca4c6e1
StepRun( Scale Testing Data ) Status: Running

Streaming azureml-logs/55_azureml-execution-tvmps_034c3ef52c0cf245127515e14911d3af66c5efe9beb19622a50fbe8268d30e98_d.txt
2021-04-30T07:16:24Z Successfully mounted a/an Blobfuse File System at /mnt/batch/tasks/shared/LS_root/jobs/mlw_sdk/azureml/d35b40ca-f1b9-4d50-99e1-8c824c4dccd8/mounts/workspaceblobstore
2021-04-30T07:16:24Z Starting output-watcher...
2021-04-30T07:16:24Z IsDedicatedCompute == True, won't poll for Low Pri Preemption
2021-04-30T07:16:24Z Executing 'Copy ACR Details file' on 10.0.0.4
2021-04-30T07:16:24Z Copy ACR Details file succeeded on 10.0.0.4. Output: 
>>>   
>>>   
Login Succeeded
Using default tag: latest
latest: Pul


Streaming azureml-logs/70_driver_log.txt
2021/04/30 07:16:49 Starting App Insight Logger for task:  runTaskLet
2021/04/30 07:16:49 Attempt 1 of http call to http://10.0.0.4:16384/sendlogstoartifacts/info
2021/04/30 07:16:49 Attempt 1 of http call to http://10.0.0.4:16384/sendlogstoartifacts/status
[2021-04-30T07:16:50.204262] Entering context manager injector.
[context_manager_injector.py] Command line Options: Namespace(inject=['ProjectPythonPath:context_managers.ProjectPythonPath', 'Dataset:context_managers.Datasets', 'RunHistory:context_managers.RunHistory', 'TrackUserError:context_managers.TrackUserError'], invocation=['scale_test.py', '--input-data', '2d9400f7-25fa-41b9-ace2-62a90414caba', '--scaler', 'DatasetConsumptionConfig:input_fbc8bf91', '--scaled-test-data', 'DatasetOutputConfig:scaled_test_data'])
Script type = None
[2021-04-30T07:16:51.414373] Entering Run History Context Manager.
[2021-04-30T07:16:52.029593] Current directory: /mnt/batch/tasks/shared/LS_root/jobs/mlw_sd




StepRunId: 3f84bc75-201b-416c-b65c-562bea5918ec
Link to Azure Machine Learning Portal: https://ml.azure.com/runs/3f84bc75-201b-416c-b65c-562bea5918ec?wsid=/subscriptions/bb345bae-e66f-461d-a1fe-67219d54a0f3/resourcegroups/rg-sandbox-kolbjorn/workspaces/mlw_sdk&tid=40cc2915-e283-4a27-9471-6bdd7ca4c6e1

StepRun(Train Data) Execution Summary
StepRun( Train Data ) Status: Finished
{'runId': '3f84bc75-201b-416c-b65c-562bea5918ec', 'target': 'Stand', 'status': 'Completed', 'startTimeUtc': '2021-04-30T07:16:24.619102Z', 'endTimeUtc': '2021-04-30T07:17:14.612593Z', 'properties': {'ContentSnapshotId': '6127b408-fe28-4888-94f9-21961470c1a4', 'StepType': 'PythonScriptStep', 'azureml.moduleid': 'e3aadc27-bf34-4e2e-9aff-7ed226fb1411', 'azureml.runsource': 'azureml.StepRun', 'azureml.nodeid': '1b01335b', 'azureml.pipelinerunid': 'f1db20db-81c3-4e79-9d8b-458bfbcfd6e7', '_azureml.ComputeTargetType': 'amlcompute', 'ProcessInfoFile': 'azureml-logs/process_info.json', 'ProcessStatusFile': 'azureml-log



PipelineRun Execution Summary
PipelineRun Status: Finished
{'runId': 'f1db20db-81c3-4e79-9d8b-458bfbcfd6e7', 'status': 'Completed', 'startTimeUtc': '2021-04-30T07:14:14.786949Z', 'endTimeUtc': '2021-04-30T07:17:24.455493Z', 'properties': {'azureml.runsource': 'azureml.PipelineRun', 'runSource': 'SDK', 'runType': 'SDK', 'azureml.parameters': '{}'}, 'inputDatasets': [], 'outputDatasets': [], 'logFiles': {'logs/azureml/executionlogs.txt': 'https://mlwsdkstorage8e08cc56a99.blob.core.windows.net/azureml/ExperimentRun/dcid.f1db20db-81c3-4e79-9d8b-458bfbcfd6e7/logs/azureml/executionlogs.txt?sv=2019-02-02&sr=b&sig=vPeVtRGC6980jEt1B87ok13dyYzKlxV0ggGRNSy%2FD0Y%3D&st=2021-04-30T07%3A04%3A21Z&se=2021-04-30T15%3A14%3A21Z&sp=r', 'logs/azureml/stderrlogs.txt': 'https://mlwsdkstorage8e08cc56a99.blob.core.windows.net/azureml/ExperimentRun/dcid.f1db20db-81c3-4e79-9d8b-458bfbcfd6e7/logs/azureml/stderrlogs.txt?sv=2019-02-02&sr=b&sig=h7kWqX%2FO88fgrNn8yWIWOMoj9EeSGtvY50QVOzQOw%2FE%3D&st=2021-04-30T07%3A

'Finished'

In [None]:
# Publish the pipeline from the run
published_pipeline = training_pipeline_run_v1.publish_pipeline(
    name="NASA-engine-training-pipeline", description="Trains NASA Engine Prediction model", version="1.0")

published_pipeline

In [None]:
rest_endpoint = published_pipeline.endpoint
print(rest_endpoint)