In [1]:
from azureml.core import Experiment
from azureml.core import Workspace, Run
from azureml.core import Environment
from azureml.core import Dataset, Datastore

from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException
from azureml.core import ScriptRunConfig

In [2]:
from azureml.core.compute import ComputeInstance

In [3]:
workspace = Workspace.from_config()

In [4]:
instance = ComputeTarget(workspace=workspace, name='gpu-compute-low')

In [5]:
dataset = Dataset.get_by_name(workspace, name='recursionbio')

In [6]:
dataset

{
  "source": [
    "('codefilestore', '/Users/kjaanson/recursionpharma/input/recbio/**')"
  ],
  "definition": [
    "GetDatastoreFiles"
  ],
  "registration": {
    "id": "ee33756c-ab30-45ec-adf9-5b93629bbbd8",
    "name": "recursionbio",
    "version": 2,
    "workspace": "Workspace.create(name='bioworks-azureml', subscription_id='73f2271c-beee-4233-b5dd-202b520bf0db', resource_group='mslearn-azureml')"
  }
}

In [7]:
env_list = Environment.list(workspace)

In [8]:
env_list.keys()

dict_keys(['AzureML-VowpalWabbit-8.8.0', 'AzureML-PyTorch-1.3-CPU', 'AzureML-Minimal', 'AzureML-Tutorial', 'AzureML-PyTorch-1.5-GPU', 'AzureML-TensorFlow-2.2-CPU', 'AzureML-PyTorch-1.6-CPU', 'AzureML-PyTorch-1.5-CPU', 'AzureML-TensorFlow-2.2-GPU', 'AzureML-PyTorch-1.6-GPU', 'AzureML-TensorFlow-2.3-CPU', 'AzureML-Triton', 'AzureML-TensorFlow-2.3-GPU', 'AzureML-DeepSpeed-0.3-GPU', 'AzureML-Pytorch1.7-Cuda11-OpenMpi4.1.0-py36', 'AzureML-Scikit-learn0.24-Cuda11-OpenMpi4.1.0-py36', 'AzureML-TensorFlow2.4-Cuda11-OpenMpi4.1.0-py36', 'AzureML-TensorFlow-1.15-Inference-CPU', 'AzureML-XGBoost-0.9-Inference-CPU', 'AzureML-PyTorch-1.6-Inference-CPU', 'AzureML-Minimal-Inference-CPU'])

In [9]:
tf_env = Environment.get(workspace=workspace, name='AzureML-TensorFlow-2.3-GPU')
tf_env = tf_env.clone(new_name='recbio-tf-2.3-efficientnet')

In [10]:
tf_env.python.conda_dependencies.add_conda_package('scikit-learn')
tf_env.python.conda_dependencies.add_conda_package('scipy')
tf_env.python.conda_dependencies.add_conda_package('matplotlib')

In [11]:
tf_env.python.conda_dependencies.add_pip_package('horovod==0.19.5')
tf_env.python.conda_dependencies.add_pip_package('retry')

In [13]:
train_scr = ScriptRunConfig(
    source_directory='./scripts',
    script='train_cnn.py',
    arguments=['--data-path', dataset.as_mount(), '--epochs', 10],
    compute_target=instance,
    environment=tf_env
)

In [14]:
train_scr

<azureml.core.script_run_config.ScriptRunConfig at 0x7fa52686a400>

In [15]:
run = Experiment(workspace=workspace, name='recbio-cnn-model').submit(train_scr)

In [16]:
run.wait_for_completion(show_output=True)

RunId: recbio-cnn-model_1619352833_c96cf3ea
Web View: https://ml.azure.com/experiments/recbio-cnn-model/runs/recbio-cnn-model_1619352833_c96cf3ea?wsid=/subscriptions/73f2271c-beee-4233-b5dd-202b520bf0db/resourcegroups/mslearn-azureml/workspaces/bioworks-azureml

Streaming azureml-logs/20_image_build_log.txt

2021/04/25 12:14:04 Downloading source code...
2021/04/25 12:14:06 Finished downloading source code
2021/04/25 12:14:06 Creating Docker network: acb_default_network, driver: 'bridge'
2021/04/25 12:14:06 Successfully set up Docker network: acb_default_network
2021/04/25 12:14:06 Setting up Docker configuration...
2021/04/25 12:14:07 Successfully set up Docker configuration
2021/04/25 12:14:07 Logging in to registry: 2e7eaafe1ee440358c1d803ca534b890.azurecr.io
2021/04/25 12:14:08 Successfully logged into 2e7eaafe1ee440358c1d803ca534b890.azurecr.io
2021/04/25 12:14:08 Executing step ID: acb_step_0. Timeout(sec): 5400, Working directory: '', Network: 'acb_default_network'
2021/04/25 12