In [1]:
import configparser

from azureml.core import Workspace
from azureml.core.authentication import AzureCliAuthentication
from azureml.core.compute import AmlCompute, ComputeTarget
from azureml.core.runconfig import RunConfiguration
from azureml.core.conda_dependencies import CondaDependencies
from azureml.pipeline.core._restclients.aeva.models.error_response import ErrorResponseException
from azureml.pipeline.core import Pipeline, PipelineEndpoint
from azureml.pipeline.steps import PythonScriptStep

# 1.0 General reference

- [Reference for multi class](https://github.com/Azure/azureml-examples/tree/main/python-sdk/tutorials/automl-with-azureml/image-classification-multiclass)
- [Reference for multi label](https://github.com/Azure/azureml-examples/tree/main/python-sdk/tutorials/automl-with-azureml/image-classification-multilabel)
- [Reference for object detection](https://github.com/Azure/azureml-examples/blob/main/python-sdk/tutorials/automl-with-azureml/image-object-detection/auto-ml-image-object-detection.ipynb)

In [2]:
## 1.0.1 Retrieve configuration file
config_ini = configparser.ConfigParser()
config_ini.read('./common/config.ini', encoding='utf-8')

## 1.0.2 Basic Azure parameters
subscription_id = config_ini.get('Azure', 'subscription_id')
resource_group = config_ini.get('Azure', 'resource_group')
workspace_name = config_ini.get('Azure', 'workspace_name')

## 1.0.3 Data
train_ratio = config_ini.get('data', 'train_ratio')
dataset_name = config_ini.get('data', 'dataset_name')
dataset_name_for_train = config_ini.get('data', 'dataset_name_for_train')
dataset_name_for_test = config_ini.get('data', 'dataset_name_for_test')

## 1.0.4 Azure ML parameters
cluster_name = config_ini.get('AML', 'cluster_name')
vm_size = config_ini.get('AML', 'vm_size')
vm_location = config_ini.get('AML', 'vm_location')
managed_id = config_ini.get('AML', 'managed_id')
image_analysis_type = config_ini.get('AML', 'image_analysis_type')
random_seed = config_ini.get('AML', 'random_seed')

## 1.0.5 Parameters for image classification
experiment_name = config_ini.get('AML', 'experiment_name')
base_model = config_ini.get('AML', 'base_model')
pipelineName = config_ini.get('AML', 'pipelineName')
model_name = config_ini.get('AML', 'model_name')

# 1.1 Authentication

In [3]:
## 1.1.1 Retrieve AML workspace with CLI authentication
cli_auth = AzureCliAuthentication()
ws = Workspace(subscription_id=subscription_id,
               resource_group=resource_group,
               workspace_name=workspace_name,
               auth=cli_auth)

# 1.2 Define compute target

In [4]:
## 1.2.1 Define computer target
## For automl image model, we need NC-series instead of NV-series
try:
    compute_target = ws.compute_targets[cluster_name]
    print("Found existing compute target.")
except KeyError:
    print("Creating a new compute target...")
    compute_config = AmlCompute.provisioning_configuration(
        vm_size=vm_size,
        idle_seconds_before_scaledown=600,
        min_nodes=0,
        max_nodes=4,
        location=vm_location,
        identity_type=managed_id,
    )
    compute_target = ComputeTarget.create(ws, cluster_name, compute_config)
# Can poll for a minimum number of nodes and for a specific timeout.
# If no min_node_count is provided, it will use the scale settings for the cluster.
compute_target.wait_for_completion(
    show_output=True, min_node_count=None, timeout_in_minutes=20
)

Found existing compute target.
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned


# 1.3 Run-Configuration

In [5]:
## 1.3.1 Run configuration 
## `compute_target` as defined in "Azure Machine Learning compute" section above
aml_run_config = RunConfiguration()
aml_run_config.target = compute_target

## 1.3.2 Add some packages relied on by data prep step
aml_run_config.environment.python.conda_dependencies = CondaDependencies.create(
    conda_packages=['pandas','scikit-learn'], 
    pip_packages=['azureml-sdk', 'azureml-automl-core'],
    pin_sdk_version=False)

# 1.4 Generate AML pipeline for training

In [80]:
## 1.4.1 Define python script and its arguments to be used
trainStep = PythonScriptStep(
    script_name="train.py",
    arguments=[
        "--subscription_id",
        subscription_id,
        "--resource_group",
        resource_group,
        "--workspace_name",
        workspace_name,
        "--cluster_name",
        cluster_name,
        "--experiment_name",
        experiment_name,
        "--base_model",
        base_model,
        "--dataset_name",
        dataset_name,
        "--dataset_name_for_train",
        dataset_name_for_train,
        "--dataset_name_for_test",
        dataset_name_for_test,
        "--train_ratio",
        train_ratio,
        "--random_seed",
        random_seed,
        "--model_name",
        model_name,
        "--image_analysis_type",
        image_analysis_type
    ],
    compute_target=compute_target,
    source_directory='.',
    runconfig=aml_run_config,
    allow_reuse = True,
)

## 1.4.2 Define Azure ML Pipeline
pipeline = Pipeline(workspace=ws, steps=[trainStep])

In [81]:
## 1.4.3 Update AML Pipeline if already exists
##  With the following steps, REST URI is kept.
try:
    pipelineEndpoint = PipelineEndpoint.get(workspace=ws, name=pipelineName)
except ErrorResponseException as ex:
    if "not found in workspace" in ex.message:
        pipelineEndpoint = None
    else:
        raise

if pipelineEndpoint is None:
    print('Pipeline does not exists, creating new: ' + pipelineName)
    pipelineEndpoint = PipelineEndpoint.publish(workspace = ws
                                        ,name = pipelineName
                                        ,pipeline=pipeline
                                        ,description="My Published Pipeline Description.")
else:
    print('Found existing pipeline ' + pipelineName + ', adding new version.')
    published_pipeline = pipeline.publish(name = pipelineName + "_Pipeline")
    pipelineEndpoint.add_default(published_pipeline)

Found existing pipeline object_detection_pipeline, adding new version.
Created step train.py [0080747b][c63f95ad-90a8-40ec-b605-4d59b3ca0cf9], (This step will run and generate new outputs)
