In [1]:
from azureml.core import Experiment
from azureml.core import Workspace, Run
from azureml.core import Environment
from azureml.core import Dataset, Datastore

from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException
from azureml.core import ScriptRunConfig

In [2]:
from azureml.core.compute import ComputeInstance

In [3]:
workspace = Workspace.from_config()

In [4]:
instance = ComputeInstance(workspace=workspace, name='kaggle-gpu')

In [5]:
dataset = Dataset.get_by_name(workspace, name='recursionbio_zip')

In [6]:
dataset

{
  "source": [
    "('codefilestore', '/Users/kjaanson/recursionpharma/input/recursion-cellular-image-classification.zip')"
  ],
  "definition": [
    "GetDatastoreFiles"
  ],
  "registration": {
    "id": "1e762845-e940-4179-b7b9-56313a8bb060",
    "name": "recursionbio_zip",
    "version": 5,
    "workspace": "Workspace.create(name='bioworks-azureml', subscription_id='73f2271c-beee-4233-b5dd-202b520bf0db', resource_group='mslearn-azureml')"
  }
}

In [7]:
env_list = Environment.list(workspace)

In [8]:
env_list.keys()

dict_keys(['AzureML-PyTorch-1.3-GPU', 'AzureML-TensorFlow-2.0-CPU', 'AzureML-Tutorial', 'AzureML-PyTorch-1.3-CPU', 'AzureML-TensorFlow-2.0-GPU', 'AzureML-Chainer-5.1.0-GPU', 'AzureML-Minimal', 'AzureML-PyTorch-1.2-CPU', 'AzureML-TensorFlow-1.12-CPU', 'AzureML-TensorFlow-1.13-CPU', 'AzureML-PyTorch-1.1-CPU', 'AzureML-TensorFlow-1.10-CPU', 'AzureML-PyTorch-1.0-GPU', 'AzureML-TensorFlow-1.12-GPU', 'AzureML-TensorFlow-1.13-GPU', 'AzureML-Chainer-5.1.0-CPU', 'AzureML-PyTorch-1.0-CPU', 'AzureML-Scikit-learn-0.20.3', 'AzureML-PyTorch-1.2-GPU', 'AzureML-PyTorch-1.1-GPU', 'AzureML-TensorFlow-1.10-GPU', 'AzureML-PySpark-MmlSpark-0.15', 'AzureML-AutoML', 'AzureML-PyTorch-1.4-GPU', 'AzureML-PyTorch-1.4-CPU', 'AzureML-VowpalWabbit-8.8.0', 'AzureML-Hyperdrive-ForecastDNN', 'AzureML-AutoML-GPU', 'AzureML-AutoML-DNN', 'AzureML-PyTorch-1.5-CPU', 'AzureML-PyTorch-1.5-GPU', 'AzureML-Sidecar', 'AzureML-Designer-Score', 'AzureML-TensorFlow-2.1-GPU', 'AzureML-TensorFlow-2.1-CPU', 'AzureML-AutoML-DNN-Vision-

In [9]:
tf_env = Environment.get(workspace=workspace, name='AzureML-TensorFlow-2.3-GPU')
tf_env = tf_env.clone(new_name='recbio-tf-2.3-efficientnet')

In [10]:
tf_env.python.conda_dependencies.add_conda_package('scikit-learn')
tf_env.python.conda_dependencies.add_conda_package('scipy')
tf_env.python.conda_dependencies.add_conda_package('matplotlib')

In [11]:
tf_env.python.conda_dependencies.add_pip_package('horovod==0.19.5')
tf_env.python.conda_dependencies.add_pip_package('retry')

In [12]:
tf_env

{
    "databricks": {
        "eggLibraries": [],
        "jarLibraries": [],
        "mavenLibraries": [],
        "pypiLibraries": [],
        "rcranLibraries": []
    },
    "docker": {
        "arguments": [],
        "baseDockerfile": null,
        "baseImage": "mcr.microsoft.com/azureml/openmpi3.1.2-cuda10.1-cudnn7-ubuntu18.04:20201112.v1",
        "baseImageRegistry": {
            "address": null,
            "password": null,
            "registryIdentity": null,
            "username": null
        },
        "enabled": false,
        "platform": {
            "architecture": "amd64",
            "os": "Linux"
        },
        "sharedVolumes": true,
        "shmSize": null
    },
    "environmentVariables": {
        "EXAMPLE_ENV_VAR": "EXAMPLE_VALUE"
    },
    "inferencingStackVersion": null,
    "name": "recbio-tf-2.3-efficientnet",
    "python": {
        "baseCondaEnvironment": null,
        "condaDependencies": {
            "channels": [
                "conda-forge"

In [13]:
train_scr = ScriptRunConfig(
    source_directory='./scripts',
    script='train.py',
    arguments=['--data-file', dataset.as_mount()],
    compute_target=instance,
    environment=tf_env
)

In [14]:
train_scr

<azureml.core.script_run_config.ScriptRunConfig at 0x7fdbb4203ac8>

In [15]:
run = Experiment(workspace=workspace, name='RecBio-TF-Test').submit(train_scr)

In [None]:
run.wait_for_completion(show_output=True)

RunId: RecBio-TF-Test_1614086353_b28bdf1d
Web View: https://ml.azure.com/experiments/RecBio-TF-Test/runs/RecBio-TF-Test_1614086353_b28bdf1d?wsid=/subscriptions/73f2271c-beee-4233-b5dd-202b520bf0db/resourcegroups/mslearn-azureml/workspaces/bioworks-azureml

Streaming azureml-logs/20_image_build_log.txt

2021/02/23 13:19:19 Downloading source code...
2021/02/23 13:19:20 Finished downloading source code
2021/02/23 13:19:21 Creating Docker network: acb_default_network, driver: 'bridge'
2021/02/23 13:19:21 Successfully set up Docker network: acb_default_network
2021/02/23 13:19:21 Setting up Docker configuration...
2021/02/23 13:19:22 Successfully set up Docker configuration
2021/02/23 13:19:22 Logging in to registry: 2e7eaafe1ee440358c1d803ca534b890.azurecr.io
2021/02/23 13:19:23 Successfully logged into 2e7eaafe1ee440358c1d803ca534b890.azurecr.io
2021/02/23 13:19:23 Executing step ID: acb_step_0. Timeout(sec): 5400, Working directory: '', Network: 'acb_default_network'
2021/02/23 13:19:23