In [33]:
import os
import azureml.core
from azureml.core import (
    Workspace,
    Experiment,
    Dataset,
    Datastore,
    ComputeTarget,
    Environment,
    ScriptRunConfig,
)
from azureml.data import OutputFileDatasetConfig
from azureml.core.compute import AmlCompute
from azureml.core.compute_target import ComputeTargetException
from azureml.pipeline.steps import PythonScriptStep
from azureml.pipeline.core import Pipeline
from azureml.core.runconfig import DockerConfiguration

# check core SDK version number
print("Azure ML SDK Version: ", azureml.core.VERSION)

Azure ML SDK Version:  1.48.0


In [34]:
workspace = Workspace.from_config()

In [35]:

exp = Experiment(workspace=workspace, name="trainv1-with-epochs")

In [36]:
output = OutputFileDatasetConfig(name="nnUNet", destination=None, source="/output/") # source should be same with RESULTS_DIR in docker


In [37]:
#Create an Azure ML Compute:
from azureml.core.compute import AmlCompute

compute_name = "trainv1-with-epochs-compute"

if compute_name in workspace.compute_targets:
    compute_target = workspace.compute_targets[compute_name]
    print(f'Found existing compute target {compute_name}')
else:
    provisioning_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_NC6',#'', STANDARD_D2_V2
                                                                min_nodes=0,
                                                                max_nodes=1)
    compute_target = ComputeTarget.create(workspace, compute_name, provisioning_config)
    compute_target.wait_for_completion(show_output=True)
    print(f'Compute_target {compute_name} created')

InProgress.
SucceededProvisioning operation finished, operation "Succeeded"
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned
Compute_target trainv1-with-epochs-compute created


In [38]:
script_folder = "src/"

prep_step = PythonScriptStep(
    name="prepare step",
    script_name="prepare.py",
    arguments=[], # no arguments for now
    source_directory=script_folder,
    compute_target=compute_target,
    allow_reuse=True,
)

In [39]:

print(compute_target)

AmlCompute(workspace=Workspace.create(name='nnunet', subscription_id='e5fb5c5f-cb5c-42c8-86b6-cf4fd5aac11b', resource_group='azureml-test'), name=trainv1-with-epochs-compute, id=/subscriptions/e5fb5c5f-cb5c-42c8-86b6-cf4fd5aac11b/resourceGroups/azureml-test/providers/Microsoft.MachineLearningServices/workspaces/nnunet/computes/trainv1-with-epochs-compute, type=AmlCompute, provisioning_state=Succeeded, location=eastus2, tags={})


In [40]:
docker_env = Environment("nnunet-docker")
docker_env.docker.base_image = "nnunet.azurecr.io/training_env:gpu" # example : "fastdotai/fastai2:latest"
docker_env.python.user_managed_dependencies = True
docker_config=DockerConfiguration(use_docker=True,shm_size='32g')

train_cfg = ScriptRunConfig(
    source_directory=script_folder,
    script="trainv1.py",
    #arguments=[output], # specify outputs directory for azure
    compute_target=compute_target,
    environment=docker_env,
    docker_runtime_config=docker_config,
)

# create the training step :
train_step = PythonScriptStep(
    name="training step",
    outputs=[output],
    source_directory=train_cfg.source_directory,
    script_name=train_cfg.script,
    runconfig=train_cfg.run_config,
)

In [41]:

pipeline = Pipeline(workspace, steps=[prep_step,train_step])
run = exp.submit(pipeline)

Created step prepare step [0657a4e6][9f5968b2-efbc-490c-bccc-638f77a30555], (This step is eligible to reuse a previous run's output)
Created step training step [463a66a3][9c3af33a-864f-4cbf-9afc-19bba9f522c9], (This step is eligible to reuse a previous run's output)
Submitted PipelineRun 9260493a-d890-481e-8aa8-60feae146611
Link to Azure Machine Learning Portal: https://ml.azure.com/runs/9260493a-d890-481e-8aa8-60feae146611?wsid=/subscriptions/e5fb5c5f-cb5c-42c8-86b6-cf4fd5aac11b/resourcegroups/azureml-test/workspaces/nnunet&tid=610ba57f-df8b-4dc2-8ddf-2532d017cd71
