# Pre requisites - INSTALL SDK + configure .env
- Install Azure ML SDK v1.55 - recommended option is A) 
    - Option A) via ESML conda: Use conda/kernel in ESML: `azure_automl_esml_v155`, or kernel: `azure_automl_esml_v155_v155` (supports both SDK v1.55 and SDK v2 v 1.15)
        - How to install: https://github.com/jostrm/azure-enterprise-scale-ml/blob/main/environment_setup/user_dev_env_install/01-install-azureml-sdk-v1+v2.md
    - Option B) via Microsoft docs. Then you also need to pip install `%pip install -U python-dotenv`
        - https://learn.microsoft.com/en-us/python/api/overview/azure/ml/install?view=azure-ml-py
- Create an .env file, with the 3 variables: 
    - AZURE_CLIENT_ID - value from project specific keuvault for secret: `esml-project001-sp-id`
    - AZURE_CLIENT_SECRET - value from project specific keuvault for secret: `esml-project001-sp-secret`
    - AZURE_TENANT_ID - value from project specific keuvault for secret: `esml-tenant-id`


In [None]:
from azureml.core.compute import ComputeTarget
from azureml.core.compute import AmlCompute
from azureml.core.compute_target import ComputeTargetException
from azureml.core.authentication import ServicePrincipalAuthentication
from azureml.core import Workspace
from azureml.core import Experiment

In [None]:
# TODO: USER MANDATORY - each USER CONFIGURE THIS
user_objectid_to_own_computeinstance = 'guid-object-id-of-ad-user'
sku_array = ["Standard_DS11_v2","STANDARD_NC24RS_V3","Standard_ND96amsr_A100_v4","Standard_D13_v2"]
compute_instance_sku = sku_array[0]
compute_instance_suffix = "03" # A (two character max) suffix, to make your compute, unique within your workspace/project.
project_number="2" #Change to YOUR esml project number
# TODO: END - USER MANDATORY


# TODO: CONFIGURE ONCE - Core team of AIFactory
subscription_id="guid"
region_short = "uks"
aifactory_environment = "dev" # dev,test,prod
aifactory_prefix = "dc-heroes"
# TODO: END - CONFIGURE ONCE

In [None]:
# NO TOUCH: Auto, since convention:
ai_factory_index = "001"
resource_group_name=aifactory_prefix+"-esml-project00"+project_number+"-"+region_short+"-"+aifactory_environment+"-"+ai_factory_index+"-rg"
resource_group_name_common=aifactory_prefix+"-esml-common-"+region_short+"-"+aifactory_environment+"-"+ai_factory_index
workspace_name="aml-prj00"+project_number+"-"+region_short+"-"+aifactory_environment+"-"+ai_factory_index
vnet_name = "vnt-esmlcmn-"+region_short+"-"+aifactory_environment+"-"+ai_factory_index # vnt-esmlcmn-uks-dev-001

subnet_name = "snet-esml-cmn-001"
vnetId =  "/"+subscription_id+"/resourceGroups/"+resource_group_name_common+"/providers/Microsoft.Network/virtualNetworks/"+vnet_name+"/subnets/"
subnet_fully_qualified = vnetId+subnet_name
ws_config_name = aifactory_environment+"_ws_config.json"
ws_config_path = "../.azureml/"

In [None]:
import os
from dotenv import load_dotenv
print("Loading environment variables from .env file - Logging in via CMN SP")
load_dotenv()
tenant_id = os.getenv('AZURE_TENANT_ID')

In [None]:
print(resource_group_name)
print(resource_group_name_common)
print(workspace_name)
print(vnet_name)
print(subnet_fully_qualified)
print(tenant_id)

In [None]:
ws = None

# LOGIN - Alternative B - via Service principal 
- Possibly elevated access, than your user

In [None]:
import os
from dotenv import load_dotenv

if (ws is None):
    print("Loading environment variables from .env file - Logging in via CMN SP")
    load_dotenv()

    tenant_id = os.getenv('AZURE_TENANT_ID')
    sp_app_id = os.getenv('AZURE_CLIENT_ID')
    sp_secret = os.getenv('AZURE_CLIENT_SECRET')

    sp = ServicePrincipalAuthentication(
        tenant_id=tenant_id,
        service_principal_id=sp_app_id, 
        service_principal_password=sp_secret)
else:
    kv = ws.get_default_keyvault()
    sp = ServicePrincipalAuthentication(tenant_id=kv.get_secret(name="esml-tenant-id"),
                                        service_principal_id=kv.get_secret(name="esml-project-sp-id"),
                                        service_principal_password=kv.get_secret(name="esml-project-sp-secret"))

ws = Workspace.get(
    name =workspace_name,
    subscription_id = subscription_id,
    resource_group = resource_group_name,auth=sp)

# write config
ws.write_config(path="../", file_name=ws_config_name)


## Cached LOGIN: 2nd time and later, with INTERACTIV Login as fallback

import os
from azureml.core.authentication import InteractiveLoginAuthentication
from dotenv import load_dotenv

try: 
    ws = Workspace.from_config(path="../", file_name=ws_config_name)
except:
    print("Loading environment variables from .env file - Logging in via CMN SP")
    load_dotenv()
    tenant_id = os.getenv('AZURE_TENANT_ID')

    auth = InteractiveLoginAuthentication(force=False,tenant_id = tenant_id)

    ws = Workspace.get(
        name = workspace_name,
        subscription_id =subscription_id,
        resource_group =resource_group_name,
        auth=auth)
    
    ws.write_config(path="../", file_name=ws_config_name)

# Create - Compute Instance, in vNet
- If not exists, otherwise start existing cluster

In [None]:
from azureml.core.compute import ComputeTarget
from azureml.core.compute import ComputeInstance
from azureml.core.compute_target import ComputeTargetException

#user_objectid_to_own_computeinstance = '262dfa84-bc95-4191-adcb-cb9398ca1741'
compute_instance_suffix = "04"
try:
    name = "p00"+project_number+"-m01-pggpu-dev-ci"+compute_instance_suffix # + datetime.datetime.now().strftime("%Y%m%d%H%M")
    cpu_cluster = ComputeInstance(workspace=ws, name=name)
    print('Found existing cluster {} for project and environment, using it.'.format(name))
    cpu_cluster.start(wait_for_completion=False, show_output=True)
except ComputeTargetException as e:
    if("ComputeTargetNotFound" in str(e)):
        print('Creating new cluster - ' + name)

        if((len(subnet_name) > 0)):
            compute_config = ComputeInstance.provisioning_configuration(vm_size=compute_instance_sku,
                                                                        ssh_public_access=False,
                                                                        assigned_user_object_id=user_objectid_to_own_computeinstance,
                                                                        assigned_user_tenant_id=tenant_id,
                                                                        vnet_resourcegroup_name=resource_group_name_common,
                                                                        vnet_name=vnet_name,
                                                                        subnet_name=subnet_name)
        else:
            compute_config = ComputeInstance.provisioning_configuration(vm_size=compute_instance_sku,
                                                                        ssh_public_access=False,
                                                                        assigned_user_object_id=user_objectid_to_own_computeinstance,
                                                                        assigned_user_tenant_id=tenant_id)
                                                                    

        cpu_cluster = ComputeTarget.create(ws, name, compute_config)

        # Can poll for a minimum number of nodes and for a specific timeout.
        # If min_node_count=None is provided, it will use the scale settings for the cluster instead
        cpu_cluster.wait_for_completion(show_output=True)
    else:
        print("En error occured when trying to start the compute. Please see error")
        print(e)


In [None]:
#cpu_cluster.stop()

# Create AML cluster, in vNet
- If not exists, otherwise start existing cluster

#Compute Train & batch scoring
from azureml.core.compute import ComputeTarget
from azureml.core.compute import AmlCompute
from azureml.core.compute_target import ComputeTargetException

try:
    name = "p00"+project_number+"-m01"+region_short+"-"+aifactory_environment # "p001-m01uks-dev"  + datetime.datetime.now().strftime("%Y%m%d%H%M")
    cpu_cluster = AmlCompute(workspace=ws, name=name)
    print('Found existing cluster {} for project and environment, using it.'.format(name))
except ComputeTargetException as e:
    if("ComputeTargetNotFound" in str(e)):
        print('Creating new cluster - ' + name)

        if((len(subnet_name) > 0)):
            compute_config = AmlCompute.provisioning_configuration(vm_size=compute_instance_sku,
                                                                    vm_priority='dedicated',  # 'dedicated', 'lowpriority'
                                                                    min_nodes=0,
                                                                    max_nodes=3,
                                                                    vnet_resourcegroup_name=resource_group_name_common,
                                                                    vnet_name=vnet_name,
                                                                    subnet_name=subnet_name)
        else:
            compute_config = AmlCompute.provisioning_configuration(vm_size=compute_instance_sku,
                                                                vm_priority= 'dedicated',  # 'dedicated', 'lowpriority'
                                                                min_nodes=0,
                                                                max_nodes=3)

        cpu_cluster = ComputeTarget.create(ws, name, compute_config)
    else:
        print("En error occured when trying to start the compute. Please see error")
        print(e)

# Can poll for a minimum number of nodes and for a specific timeout.
# If min_node_count=None is provided, it will use the scale settings for the cluster instead
cpu_cluster.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=30)