In [2]:
from azureml.core import Workspace, Environment, ScriptRunConfig, Experiment 
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

In [3]:
ws = Workspace.from_config()

# Creating a managed Compute target 

We may be interested to use the cloud `Compute` resources for two reasons: 
- For large data and large models local machine is not enough to store and run the computation. For this we can use `compute instances` and/or `compute clusters`. 
- It is natural to use cloud computation for model deployment. For this we use `Inference clusters`. 

Here we discuss managing compute target for the first purpose in mind. We discuss managing the compute target for the deployment later. 

We use `AmlCompute` object from `azureml-sdk` to provision the computation target. 

In [4]:
compute_name = 'rk-test-compute'

compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_DS11_V2', 
                                                       max_nodes=2)

rk_cluster = ComputeTarget.create(ws, compute_name, compute_config)
rk_cluster.wait_for_completion(show_output=True)

InProgress...
SucceededProvisioning operation finished, operation "Succeeded"
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned


# Check for an existing compute target 

Here we can avoide creating additional cluster if it is already exists. For this we use `ComputeTargetException` object.  

In [5]:
try:
    rk_cluster = ComputeTarget(ws, compute_name)
    print('Found existing cluster')
except ComputeTargetException:
    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_DS11_V2', 
                                                       max_nodes=2)

    rk_cluster = ComputeTarget.create(ws, compute_name, compute_config)
    
rk_cluster.wait_for_completion(show_output=True)

Found existing cluster
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned


# Use compute targets 

Now we create a vertual environment in the compute target. We can configure a python script to run on the cloud computation target with specified virtual environment using `ScriptRunConfig` object. 

In [6]:
training_env = Environment.get(workspace=ws, 
                               name='training_environment')

script_config = ScriptRunConfig(source_directory='.', 
                               script='experiment_1.py', 
                               environment=training_env,  # environment specified 
                               compute_target=rk_cluster  # computation target on cloud 
                               )

In [7]:
# rest of the code is similar to the previous one 
experiment = Experiment(workspace=ws, name='exp-007')
run = experiment.submit(config=script_config)
run.wait_for_completion(show_output=True)

RunId: exp-007_1633455195_d27edee9
Web View: https://ml.azure.com/runs/exp-007_1633455195_d27edee9?wsid=/subscriptions/038a8790-7ab1-483b-abba-30f101e8dcce/resourcegroups/aml-resources-mstutorial/workspaces/aml-mstutorial&tid=68fda48c-5b34-479d-91f9-034da6f0efe3

Streaming azureml-logs/20_image_build_log.txt

2021/10/05 17:33:21 Downloading source code...
2021/10/05 17:33:22 Finished downloading source code
2021/10/05 17:33:22 Creating Docker network: acb_default_network, driver: 'bridge'
2021/10/05 17:33:22 Successfully set up Docker network: acb_default_network
2021/10/05 17:33:22 Setting up Docker configuration...
2021/10/05 17:33:23 Successfully set up Docker configuration
2021/10/05 17:33:23 Logging in to registry: 2c139677d5ec4bce96b537848a4e6382.azurecr.io
2021/10/05 17:33:24 Successfully logged into 2c139677d5ec4bce96b537848a4e6382.azurecr.io
2021/10/05 17:33:24 Executing step ID: acb_step_0. Timeout(sec): 5400, Working directory: '', Network: 'acb_default_network'
2021/10/05 1


libgfortran4-7.5.0   | 995 KB    |            |   0% 
libgfortran4-7.5.0   | 995 KB    | ########## | 100% 

tk-8.6.11            | 3.0 MB    |            |   0% 
tk-8.6.11            | 3.0 MB    | ########## | 100% 
tk-8.6.11            | 3.0 MB    | ########## | 100% 
Preparing transaction: ...working... done
Verifying transaction: ...working... done
Executing transaction: ...working... 

    Installed package of scikit-learn can be accelerated using scikit-learn-intelex.
    More details are available here: https://intel.github.io/scikit-learn-intelex

    For example:

        $ conda install scikit-learn-intelex
        $ python -m sklearnex my_application.py

    

done
Installing pip dependencies: ...working... 
Ran pip subprocess with arguments:
['/azureml-envs/azureml_63f39e4ec1a88630ccbd9626dd1e7495/bin/python', '-m', 'pip', 'install', '-U', '-r', '/azureml-environment-setup/condaenv.igg2etsp.requirements.txt']
Pip subprocess output:
Collecting azureml-defaults
  Downloading

Removing intermediate container f41be5050fba
 ---> 42b3d214813c
Step 9/19 : ENV PATH /azureml-envs/azureml_63f39e4ec1a88630ccbd9626dd1e7495/bin:$PATH
 ---> Running in c90bb1c433c5
Removing intermediate container c90bb1c433c5
 ---> 940296b8fb83
Step 10/19 : COPY azureml-environment-setup/send_conda_dependencies.py azureml-environment-setup/send_conda_dependencies.py
 ---> ab5f1599d40b
Step 11/19 : RUN echo "Copying environment context"
 ---> Running in 4a1ca81f4d3b
Copying environment context
Removing intermediate container 4a1ca81f4d3b
 ---> 2cbb974f0787
Step 12/19 : COPY azureml-environment-setup/environment_context.json azureml-environment-setup/environment_context.json
 ---> 97f1f9e67c3e
Step 13/19 : RUN python /azureml-environment-setup/send_conda_dependencies.py -p /azureml-envs/azureml_63f39e4ec1a88630ccbd9626dd1e7495
 ---> Running in c70fc219b30f
Report materialized dependencies for the environment
Reading environment context
Exporting conda environment
Sending request with mate


Streaming azureml-logs/75_job_post-tvmps_46bbc533ae2dfa42495cb121b9795258175da64abbc6bbc67d5417d68ce2eb22_d.txt

[2021-10-05T17:43:42.609793] Entering job release
[2021-10-05T17:43:43.623143] Starting job release
[2021-10-05T17:43:43.623681] Logging experiment finalizing status in history service.
Starting the daemon thread to refresh tokens in background for process with pid = 137
[2021-10-05T17:43:43.624145] job release stage : upload_datastore starting...
[2021-10-05T17:43:43.633320] Entering context manager injector.
[2021-10-05T17:43:43.633914] job release stage : start importing azureml.history._tracking in run_history_release.
[2021-10-05T17:43:43.635292] job release stage : upload_datastore completed...
[2021-10-05T17:43:43.635715] job release stage : execute_job_release starting...
[2021-10-05T17:43:43.636216] job release stage : copy_batchai_cached_logs starting...
[2021-10-05T17:43:43.636852] job release stage : copy_batchai_cached_logs completed...
[2021-10-05T17:43:43.739

{'runId': 'exp-007_1633455195_d27edee9',
 'target': 'rk-test-compute',
 'status': 'Completed',
 'startTimeUtc': '2021-10-05T17:42:33.094364Z',
 'endTimeUtc': '2021-10-05T17:43:57.720247Z',
 'services': {},
 'properties': {'_azureml.ComputeTargetType': 'amlcompute',
  'ContentSnapshotId': '07e5aafc-ab2a-4420-91b3-706b0e611f05',
  'azureml.git.repository_uri': 'https://github.com/roshankoirala/MLOps.git',
  'mlflow.source.git.repoURL': 'https://github.com/roshankoirala/MLOps.git',
  'azureml.git.branch': 'master',
  'mlflow.source.git.branch': 'master',
  'azureml.git.dirty': 'True',
  'ProcessInfoFile': 'azureml-logs/process_info.json',
  'ProcessStatusFile': 'azureml-logs/process_status.json'},
 'inputDatasets': [],
 'outputDatasets': [],
 'runDefinition': {'script': 'experiment_1.py',
  'command': '',
  'useAbsolutePath': False,
  'arguments': [],
  'sourceDirectoryDataStore': None,
  'framework': 'Python',
  'communicator': 'None',
  'target': 'rk-test-compute',
  'dataReferences': {

# Delete compute cluster 

`Compute` resources on the cloud are one of the most expensive one. So after the use it is good idea to delete the cluster. We may also stop but it may charge some money still. 

In [11]:
rk_cluster.delete()