# Setup configurations

In [14]:
# Setup parameter values
gt_input_dataset_name = "bfp_detection_ground_truth"
default_data_path = "data"
datastore_path = "data"

## Connect to AzureML workspace

In [15]:
from azureml.core import Workspace

ws = Workspace.from_config()
print("Workspace", ws.name, "loaded")
print("Subscprtion ID",ws.subscription_id)


Workspace poi_machine_learning_workspace loaded
Subscprtion ID 2888fde7-9a5c-48fc-8623-84f525de174c


In [16]:
from azureml.core.compute import AmlCompute
from azureml.core.compute import ComputeTarget
import os
compute_name = os.environ.get('AML_COMPUTE_CLUSTER_NAME', 'BFP-training-v100')
if compute_name in ws.compute_targets:
    compute_target = ws.compute_targets[compute_name]
    print("Compute available Status {}".format(compute_target.get_status().state))
else:
    print("sorry no compute found !")

Compute available Status Stopped


## Prepare environment

In [17]:
from azureml.core import Environment
env_name = 'poi-bfp-detection_efficientnetb7_training'
data_preparation_env = Environment.from_conda_specification(env_name, f'./../requirements/{env_name}.yaml')

# Specify a GPU base image
# training_segmentation_env.docker.enabled = True
data_preparation_env.docker.shared_volumes = False

data_preparation_env.docker.base_image = None
data_preparation_env.docker.base_dockerfile = "../requirements/Dockerfile"

# register 
data_preparation_env.register(workspace=ws)

data_preparation_env = Environment.get(ws, env_name)
print('Environment', data_preparation_env.name, 'registered. Version', data_preparation_env.version)

No Python version provided, defaulting to "3.8.13"
'shared_volumes' is deprecated. Please use the azureml.core.runconfig.DockerConfiguration object instead.


Environment poi-bfp-detection_efficientnetb7_training registered. Version 2


## Define run configuration

In [18]:
from azureml.core.runconfig import RunConfiguration,DockerConfiguration
docker_configuration = DockerConfiguration(use_docker=True)

monitoring_run_config = RunConfiguration()
monitoring_run_config.docker = docker_configuration
monitoring_run_config.environment = data_preparation_env

## Prepare compute target

In [19]:
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

compute_target_name = 'BFP-training-v100'

compute_target = AmlCompute(ws, compute_target_name)
print(f'Existing compute target {compute_target_name} found.')

Existing compute target BFP-training-v100 found.


## Register experiment

In [20]:
from azureml.core import Experiment

experiment_name = 'expr-bfp-detection_efficientnetb7_training'
experiment = Experiment(workspace=ws, name=experiment_name)

## Create and register Dataset

In [11]:
from azureml.core import Datastore, Dataset

datastores = Datastore.register_azure_blob_container(
    workspace=ws,
    datastore_name='poimachinelearning',
    container_name='bfp-detection',
    account_name='poimachinelearning',
    sas_token ='?sv=2021-04-10&si=bfp-detection-184C4196D0D&sr=c&sig=ljOp7KSY0Ea0pcD%2BA6DRYuDPLY6E8GYldF0Ha1sPyeI%3D'
)

In [12]:
datastore = ws.get_default_datastore()
datastore = ws.datastores['poimachinelearning']
datastore_paths = [(datastore,datastore_path)]

input_dataset = Dataset.File.from_files(path=datastore_paths)
input_dataset.register(workspace = ws,name = gt_input_dataset_name,
                       description = 'Ground truth data for building footprint detection',
                       create_new_version = True)


{
  "source": [
    "('poimachinelearning', 'data')"
  ],
  "definition": [
    "GetDatastoreFiles"
  ],
  "registration": {
    "id": "6da6521d-44c3-4b74-950d-977585a66569",
    "name": "bfp_detection_ground_truth",
    "version": 2,
    "description": "Ground truth data for building footprint detection",
    "workspace": "Workspace.create(name='poi_machine_learning_workspace', subscription_id='2888fde7-9a5c-48fc-8623-84f525de174c', resource_group='poi_datalake')"
  }
}

## Get Raw Data Files 

In [17]:
from azureml.core import Workspace, Dataset

subscription_id = '2888fde7-9a5c-48fc-8623-84f525de174c'
resource_group = 'poi_datalake'
workspace_name = 'poi_machine_learning_workspace'

workspace = Workspace(subscription_id, resource_group, workspace_name)

dataset = Dataset.get_by_name(workspace, name='bfp_detection_ground_truth')
print(dataset)
# dataset.download(target_path='.', overwrite=False)

FileDataset
{
  "source": [
    "('poimachinelearning', 'data')"
  ],
  "definition": [
    "GetDatastoreFiles"
  ],
  "registration": {
    "id": "6da6521d-44c3-4b74-950d-977585a66569",
    "name": "bfp_detection_ground_truth",
    "version": 2,
    "description": "Ground truth data for building footprint detection",
    "workspace": "Workspace.create(name='poi_machine_learning_workspace', subscription_id='2888fde7-9a5c-48fc-8623-84f525de174c', resource_group='poi_datalake')"
  }
}


## Building the pipeline

### Define pipeline parameters

In [21]:
from azureml.pipeline.core.graph import PipelineParameter

input_dataset = PipelineParameter(name="path",default_value=gt_input_dataset_name)
input_size = PipelineParameter(name="imgsize",default_value=512)
classes = PipelineParameter(name='classes',default_value=1)
# model params 
epochs = PipelineParameter(name="epochs",default_value=50)
loss = PipelineParameter(name="loss",default_value="jaccard_distance")
metrics = PipelineParameter(name='metrics',default_value='all')
backbone= PipelineParameter(name='backbone',default_value='efficientnetb7')
batch_size = PipelineParameter(name='batch_size',default_value=1)
out_path = PipelineParameter(name='out_path',default_value='runs')


## Define Pipleine Steps 

In [22]:
from azureml.pipeline.steps import PythonScriptStep

bfp_detection_script_args = [
    "--path", gt_input_dataset_name,
    "--imgsize", input_size,
    "--classes", classes,
    "--epochs",epochs,
    "--loss",loss,
    "--metrics",metrics,
    "--backbone",backbone,
    "--batch_size",batch_size,
    "--out_path",out_path
]

bfp_detection_training_step=PythonScriptStep(
    name='POI Building footprint detection',
    script_name='main.py',
    arguments=bfp_detection_script_args,
    compute_target=compute_target,
    runconfig=monitoring_run_config,
    source_directory='../scripts/'
)

## Create pipeline

In [23]:
from azureml.pipeline.core import Pipeline

pipeline = Pipeline(workspace=ws, steps=[bfp_detection_training_step])
print("Pipeline created")

Pipeline created


Performing interactive authentication. Please follow the instructions on the terminal.


### Submit pipeline experiment

In [13]:
from azureml.widgets import RunDetails

pipeline_run = experiment.submit(pipeline)
RunDetails(pipeline_run).show()

Created step POI Building footprint detection [71878fea][eea78a0c-5660-42a7-a8d1-54beaaaa7fd1], (This step will run and generate new outputs)
Submitted PipelineRun 8571bc95-524e-46cf-bf48-08bfeda2aed0
Link to Azure Machine Learning Portal: https://ml.azure.com/runs/8571bc95-524e-46cf-bf48-08bfeda2aed0?wsid=/subscriptions/2888fde7-9a5c-48fc-8623-84f525de174c/resourcegroups/poi_datalake/workspaces/poi_machine_learning_workspace&tid=374f8026-7b54-4a3a-b87d-328fa26ec10d


_PipelineWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', …

### Validate pipeline

In [16]:
pipeline.validate()
print("Pipeline validation completed")

Pipeline validation completed
