# NVIDIA GPU Cluster (NGC) applications on Azure Machine Learning

### Imports

In [1]:
from Azuremlcomputecluster import AzureMLComputeCluster
import ngccontent
from azureml.core import Workspace, Experiment, Datastore, Dataset, Environment
from azureml.core.conda_dependencies import CondaDependencies
from azureml.core.runconfig import MpiConfiguration
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.authentication import InteractiveLoginAuthentication

import os
from IPython.core.display import display, HTML

### Setup

#### Workspace

Documentation: [Workspace](https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.core.workspace.workspace?view=azure-ml-py)

In [2]:
configdata = ngccontent.get_config("ngc_app_config.json")

subscription_id = configdata["azureml_user"]["subscription_id"]
resource_group = configdata["azureml_user"]["resource_group"]
workspace_name = configdata["azureml_user"]["workspace_name"] 

ws = Workspace(
    workspace_name=workspace_name
    , subscription_id=subscription_id
    , resource_group=resource_group
)

verify = f'''
Subscription ID: {subscription_id}
Resource Group: {resource_group}
Workspace: {workspace_name}
'''
print(verify)


Subscription ID: ab221ca4-f098-422d-ab2f-5073b3851e68
Resource Group: TutorialTestA
Workspace: TutorialTestA1



In [3]:
### vnet settings
vnet_rg = ws.resource_group
vnet_name = configdata["aml_compute"]["vnet_name"]
subnet_name = configdata["aml_compute"]["subnet_name"]

### azure ml names
ct_name  = configdata["aml_compute"]["ct_name"]
exp_name = configdata["aml_compute"]["exp_name"]

### trust but verify
verify = f'''
vNET RG: {vnet_rg}
vNET name: {vnet_name}
vNET subnet name: {subnet_name}
Compute target: {ct_name}
Experiment name: {exp_name}
'''
print(verify)


vNET RG: TutorialTestA
vNET name: TutorialTestA-vnet
vNET subnet name: default
Compute target: clara-ct
Experiment name: clara_setup



#### Compute target
Documentation: [Compute target](https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.core.compute.amlcompute(class)?view=azure-ml-py)

In [4]:
if configdata["aml_compute"]["vm_name"] in configdata["supported_vm_sizes"].keys():
    vm_name = configdata["aml_compute"]["vm_name"]
    gpus_per_node = configdata["supported_vm_sizes"][vm_name]
    
    print("Setting up compute target {ct_name} with vm_size: {vm_name} with {gpus_per_node} GPUs".format(ct_name=ct_name,vm_name=vm_name,gpus_per_node=gpus_per_node))
    
    if ct_name not in ws.compute_targets:
        config = AmlCompute.provisioning_configuration(
            vm_size=vm_name
            , min_nodes=configdata["aml_compute"]["min_nodes"]
            , max_nodes=configdata["aml_compute"]["max_nodes"]
            , vnet_resourcegroup_name=vnet_rg
            , vnet_name=vnet_name
            , subnet_name=subnet_name
            , idle_seconds_before_scaledown=configdata["aml_compute"]["idle_seconds_before_scaledown"]
            , remote_login_port_public_access='Enabled'
        )
        ct = ComputeTarget.create(ws, ct_name, config)
        ct.wait_for_completion(show_output=True)
    else:
        print("Loading Pre-existing Compute Target {ct_name}".format(ct_name=ct_name)) 
        ct = ws.compute_targets[ct_name]
else:
    print("Unsupported vm_size {vm_size}".format(vm_size=vm_name))
    print("The specified vm size must be one of ...")
    for azure_gpu_vm_size in configdata["supported_vm_sizes"].keys():
        print("... " + azure_gpu_vm_size)
        raise Exception("{vm_size} does not support Pascal or above GPUs".format(vm_size=vm_name))

Setting up compute target clara-ct with vm_size: Standard_NC24s_v3 with 4 GPUs
Creating
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned


#### Environment
Documentation: [Environment](https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.core.environment?view=azure-ml-py)

In [5]:
environment_name=configdata["aml_compute"]["environment_name"]
python_interpreter = configdata["aml_compute"]["python_interpreter"]
conda_packages = configdata["aml_compute"]["conda_packages"]
from azureml.core import ContainerRegistry

if environment_name not in ws.environments:
    env = Environment(name=environment_name)
    env.docker.enabled = configdata["aml_compute"]["docker_enabled"]
    env.docker.base_image = None
    env.docker.base_dockerfile = "FROM {dockerfile}".format(dockerfile=configdata["ngc_content"]["base_dockerfile"])
    env.python.interpreter_path = python_interpreter
    env.python.user_managed_dependencies = True
 
    conda_dep = CondaDependencies()

    for conda_package in conda_packages:
        conda_dep.add_conda_package(conda_package)
    
    env.python.conda_dependencies = conda_dep
    env.register(workspace=ws)
    evn = env
else:
    env = ws.environments[environment_name]

#### Additional NGC Content

In [6]:
for content in configdata["ngc_content"]["additional_content"]: 
    data_dir,targetfile = ngccontent.download(content["url"],content["localdirectory"],content["filename"])
    if (content["zipped"]):
        ngccontent.unzippedfile(data_dir,targetfile)
    ngccontent.upload_data(ws, ws.get_default_datastore(), data_dir, content["computedirectory"])

-->> /mnt/batch/tasks/shared/LS_root/mounts/clusters/mrgclaratest/code/NGC/clara/clarasdk.zip file already exists locally <<--

Uploading an estimated of 109 files
Target already exists. Skipping upload for clara/clarasdk.zip
Target already exists. Skipping upload for clara/CovidChestXRayClassifier.ipynb
Target already exists. Skipping upload for clara/Data_Download.ipynb
Target already exists. Skipping upload for clara/download_data.sh
Target already exists. Skipping upload for clara/license.txt
Target already exists. Skipping upload for clara/readMe.md
Target already exists. Skipping upload for clara/TransformInputData.py
Target already exists. Skipping upload for clara/Welcome.ipynb
Target already exists. Skipping upload for clara/MMARs/AutoML/AutoML.ipynb
Target already exists. Skipping upload for clara/MMARs/AutoML/license.txt
Target already exists. Skipping upload for clara/MMARs/AutoML/BYOC/myAutoMLController.py
Target already exists. Skipping upload for clara/MMARs/AutoML/comma

### Create Azure ML Dask cluster

In [7]:
amlcluster = AzureMLComputeCluster(
              workspace=ws
            , compute_target=ct
            , initial_node_count=1
            , experiment_name=configdata["aml_compute"]["exp_name"]
            , environment_definition=env
            , use_gpu=True
            , n_gpus_per_node=1
            , jupyter=True
            , jupyter_port=configdata["aml_compute"]["jupyter_port"]
            , dashboard_port=9001
            , scheduler_port=9002
            , scheduler_idle_timeout=1200
            , worker_death_timeout=30
            , additional_ports=[]
            , datastores=[]
            , telemetry_opt_out=True
            , asynchronous=False
        )

############################## Setting up cluster ##############################
####################### Waiting for compute cluster's IP #######################
...................................................................


########################### Scheduler: 10.1.3.7:8786 ###########################
setsid socat tcp-listen:9000,reuseaddr,fork tcp:10.1.3.7:8888 &
########################### Connections established ############################


In [8]:
display(HTML("""<a href="{link}">Jupyter Link</a>""".format(link=amlcluster.jupyter_link)))