# AML Template - sample run on remote compute

## Init & Login

In [None]:
## Check core SDK version number
import azureml.core
import mlflow
import os

from azureml.core import (Datastore, Dataset, Environment, Experiment, ScriptRunConfig,
                          Workspace)
from azureml.core.authentication import InteractiveLoginAuthentication
from IPython.display import display

print("[INFO] SDK version:", azureml.core.VERSION)

ws = Workspace.from_config()
print("[SUCCESS] LOGGED IN: ",ws.name, ws.resource_group, ws.location, ws.subscription_id, sep=' @ ')

## set mlflow backend to AML
mlflow.set_tracking_uri(ws.get_mlflow_tracking_uri())

print("[INFO] MLFlow wired to AML:", "experiments.azureml.net" in mlflow.get_tracking_uri())

aml_compute = "aml-cluster" # selecting an AML Compute cluster where the actual training happens
aml_ds = "aml_data" # selecting Datastore a linked Azure Storage account
aml_dset = "diabetes_multiple" # select particular Dataset in Datastore
aml_experiment = "mlflow-azureml" # name the Experiment for tracking purposes
project_folder = "code/train/diabetes" # where is the code to run on disk
mlflow.set_experiment(aml_experiment) # setup MLFlow
experiment = Experiment(ws, aml_experiment) # init experiment by name

## Training on AML Compute

To train on AML Remote compute taget you need specify three config steps:

1. define Compute Taget
1. define Environment
1. define Script Run

And lastly run the experiment!

In [None]:
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

compute_target = ComputeTarget(workspace=ws, name=aml_compute)
# Use the 'status' property to get a detailed status for the current cluster. 
cts = compute_target.status.serialize()
print(f'Found existing compute target: {aml_compute}\n({"cluster is running" if (int(cts["currentNodeCount"])>0) else "cluster is idle"}) currentNodeCount: {cts["currentNodeCount"]}, vmPriority: {cts["vmPriority"]}, vmSize: {cts["vmSize"]}')

In [None]:
from azureml.core import Environment
from azureml.core.conda_dependencies import CondaDependencies

conda_env = Environment('mlflow-azureml-diabetes')
conda_env.python.conda_dependencies = CondaDependencies.create(pip_packages=['azureml-sdk',
                                                                             'azureml-dataprep[pandas,fuse]',
                                                                             'scikit-learn==0.22.2.post1',
                                                                             'azureml-mlflow',
                                                                             'lightgbm',
                                                                            'joblib'])
conda_env.python.conda_dependencies.save(os.path.join(project_folder,'env.yml'))
conda_env.register(ws)


In [None]:
from azureml.core import ScriptRunConfig

wtds = Dataset.get_by_name(ws, name=aml_dset)

src = ScriptRunConfig(source_directory=project_folder, 
                      script='train.py', 
                      arguments =[wtds.as_named_input('data')])

src.run_config.framework = 'python'
src.run_config.environment = conda_env
src.run_config.target = compute_target.name

In [None]:
run = experiment.submit(config=src)
run

In [None]:
from azureml.widgets import RunDetails
RunDetails(run).show()