In [1]:
import os
import azureml.core
print("SDK version:", azureml.core.VERSION)

from azureml.core import Datastore, Experiment, ScriptRunConfig, Workspace
from azureml.core.compute import AmlCompute
from azureml.core.compute import ComputeTarget
from azureml.core.conda_dependencies import CondaDependencies
from azureml.core.runconfig import DEFAULT_CPU_IMAGE, RunConfiguration
from azureml.data.data_reference import DataReference
from azureml.pipeline.core import Pipeline, PipelineData
from azureml.pipeline.steps import PythonScriptStep
from azureml.train.estimator import Estimator
from azureml.widgets import RunDetails
from pathlib import Path



SDK version: 1.0.39


In [2]:
ws = Workspace.from_config()
print('Name: {0}'.format(ws.name), 'Resource Group: {0}'.format(ws.resource_group), 'Location: {0}'.format(ws.location), 'Subscription Id: {0}'.format(ws.subscription_id), sep = '\n')

If you run your code in unattended mode, i.e., where you can't give a user input, then we recommend to use ServicePrincipalAuthentication or MsiAuthentication.
Please refer to aka.ms/aml-notebook-auth for different authentication mechanisms in azureml-sdk.


Name: FishyBusiness
Resource Group: FishyBusiness
Location: australiaeast
Subscription Id: 3191ba83-be2b-4b29-8409-f06e2fbb65bd


In [3]:
compute_name = 'CPU'

if compute_name in ws.compute_targets:
    compute_target = ws.compute_targets[compute_name]
    
    if compute_target and type(compute_target) is AmlCompute:
        print('Found compute target: ' + compute_name)
else:
    provisioning_configuration = AmlCompute.provisioning_configuration(vm_size = 'STANDARD_D2_V2',
                                                                min_nodes = 1,
                                                                max_nodes = 4)

    compute_target = ComputeTarget.create(ws, compute_name, provisioning_configuration)
    
    compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)
    
    print(compute_target.status.serialize())

Found compute target: CPU


In [4]:
default_file_store = Datastore(ws, "workspacefilestore")

In [13]:
parent_folder = Path(os.getcwd()).parent

data_path = os.path.join(parent_folder, 'data')
raw_data_path = os.path.join(parent_folder, data_path, 'raw')
src_data_path = os.path.join(parent_folder, 'src')

source_directory = os.path.join(src_data_path, 'FishOrNoFish')

In [6]:
file_paths = []

for root, dirs, files in os.walk(raw_data_path):
    for file in files:
        if '.MP4' in file:
            file_paths.append(os.path.join(root, file))

In [7]:
for file_path in file_paths:
    print(file_path)
    target_path = os.sep.join(file_path.split(os.sep)[3:-1])
    print(target_path)
    default_file_store.upload_files([file_path], target_path=target_path, show_progress=True)

C:\Source\FishyBusiness\data\raw\Channels 2017\Mudginberri 2017\Transect 1\Location 1\GOPR0810.MP4
data\raw\Channels 2017\Mudginberri 2017\Transect 1\Location 1


In [8]:
#TODO: Multiple videos...

### Experiment

In [13]:
experiment_name = 'FishyBusiness'

experiment = Experiment(workspace=ws, name=experiment_name)
experiment.start_logging()

Experiment,Id,Type,Status,Details Page,Docs Page
FishyBusiness,acd88bf2-988a-42f6-8520-b04b2c2b06c2,,NotStarted,Link to Azure Portal,Link to Documentation


### Estimator

In [14]:
'''
script_params = {
    '--data-folder': ds.path('mnist').as_mount(),
    '--regularization': 0.05
}
'''

estimator = Estimator(source_directory=source_directory,
                      entry_script='extract_frames.py',
                      #script_params=script_params,
                      compute_target=compute_target)

In [15]:
run = experiment.submit(config=estimator)
run

Experiment,Id,Type,Status,Details Page,Docs Page
FishyBusiness,FishyBusiness_1558268790_60f8f4fa,azureml.scriptrun,Queued,Link to Azure Portal,Link to Documentation


In [16]:
RunDetails(run).show()

_UserRunWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', '…

In [17]:
run.wait_for_completion(show_output=True)

{'runId': 'FishyBusiness_1558268790_60f8f4fa',
 'target': 'CPU',
 'status': 'Finalizing',
 'startTimeUtc': '2019-05-19T12:26:44.131298Z',
 'properties': {'azureml.runsource': 'experiment',
  'ContentSnapshotId': 'de43764b-62ae-4991-a572-3c812b5becab',
  'azureml.git.repository_uri': 'https://stvanbod.visualstudio.com/FishyBusiness/_git/FishyBusiness',
  'azureml.git.branch': 'master',
  'azureml.git.commit': '2797d59c6ef882685bb37a36bb570c873df2453a',
  'azureml.git.dirty': 'True',
  'azureml.git.build_id': None,
  'azureml.git.build_uri': None,
  'mlflow.source.git.branch': 'master',
  'mlflow.source.git.commit': '2797d59c6ef882685bb37a36bb570c873df2453a',
  'mlflow.source.git.repoURL': 'https://stvanbod.visualstudio.com/FishyBusiness/_git/FishyBusiness'},
 'runDefinition': {'script': 'extract_frames.py',
  'arguments': [],
  'sourceDirectoryDataStore': None,
  'framework': 'Python',
  'communicator': 'None',
  'target': 'CPU',
  'dataReferences': {},
  'jobName': None,
  'autoPrepare

### Pipeline

In [14]:
videos = DataReference(datastore=default_file_store,
                       data_reference_name='raw_data',
                       path_on_datastore=target_path)

In [15]:
frames = PipelineData('raw_frames', datastore=default_file_store)

In [16]:
conda_dependencies = CondaDependencies()
#conda_dependencies.add_conda_package('opencv')

run_configuration = RunConfiguration()
run_configuration.environment.docker.enabled = True
run_configuration.environment.docker.base_image = DEFAULT_CPU_IMAGE
run_configuration.environment.python.user_managed_dependencies = False
run_configuration.environment.python.conda_dependencies = conda_dependencies
run_configuration.target = compute_target

In [17]:
extract_frames_step = PythonScriptStep(name='extract_frames',
                                       source_directory=source_directory,
                                       script_name='extract_frames.py',
                                       #arguments=["--raw_data", videos, "--raw_frames", frames],
                                       #inputs=[videos],
                                       #outputs=[frames],
                                       runconfig=run_configuration,
                                       allow_reuse=False)

In [18]:
pipeline = Pipeline(workspace=ws, steps=[extract_frames_step])

In [19]:
pipeline_run = Experiment(ws, 'extract_frames').submit(pipeline)

Created step extract_frames [5b37ef5b][a697a70c-72d7-4148-ab95-91b0cc534256], (This step will run and generate new outputs)
Submitted pipeline run: 7d59a635-4a45-4601-bb3a-422e0cd7d742


In [20]:
RunDetails(pipeline_run).show()

_PipelineWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', …

In [22]:
pipeline_run.wait_for_completion(show_output=True)

PipelineRunId: 7d59a635-4a45-4601-bb3a-422e0cd7d742
Link to Portal: https://mlworkspace.azure.ai/portal/subscriptions/3191ba83-be2b-4b29-8409-f06e2fbb65bd/resourceGroups/FishyBusiness/providers/Microsoft.MachineLearningServices/workspaces/FishyBusiness/experiments/extract_frames/runs/7d59a635-4a45-4601-bb3a-422e0cd7d742
PipelineRun Status: Running


StepRunId: 7ab1ddfd-a28d-4c08-ad8d-da53c5a8c083
Link to Portal: https://mlworkspace.azure.ai/portal/subscriptions/3191ba83-be2b-4b29-8409-f06e2fbb65bd/resourceGroups/FishyBusiness/providers/Microsoft.MachineLearningServices/workspaces/FishyBusiness/experiments/extract_frames/runs/7ab1ddfd-a28d-4c08-ad8d-da53c5a8c083
StepRun( extract_frames ) Status: NotStarted

Streaming azureml-logs/80_driver_log.txt
In train.py
As a data scientist, this is where I use my training code.
Raw Data: None


The experiment completed successfully. Finalizing run...
Logging experiment finalizing status in history service
Cleaning up all outstanding Run operations

'Finished'

In [23]:
children = pipeline_run.get_children()
for child in children:
    status = child.get_status()
    print('Script:', child.name, 'status:', status)
    
    if status == "Failed":
        joblog = child.get_job_log()
        print('job log:', joblog)

Script: extract_frames status: Finished


### ScriptRunConfig

In [29]:
run_configuration.environment.docker.enabled = False
run_configuration.environment.python.user_managed_dependencies = True
run_configuration.target = compute_target

In [30]:
script_run_config = ScriptRunConfig(source_directory=source_directory,
                                    script='extract_frames.py',
                                    run_config=run_configuration,)

In [31]:
run = experiment.submit(script_run_config)
run

Experiment,Id,Type,Status,Details Page,Docs Page
FishyBusiness,FishyBusiness_1558269074_d346b7f4,azureml.scriptrun,Running,Link to Azure Portal,Link to Documentation


In [32]:
RunDetails(run).show()

_UserRunWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', '…

In [33]:
run.wait_for_completion(show_output=True)

RunId: FishyBusiness_1558269074_d346b7f4

Streaming azureml-logs/80_driver_log.txt

In train.py
As a data scientist, this is where I use my training code.


The experiment completed successfully. Finalizing run...
Logging experiment finalizing status in history service

Execution Summary
RunId: FishyBusiness_1558269074_d346b7f4

This compute target type doesn't support non-Docker runs; overriding run configuration enable Docker.




{'runId': 'FishyBusiness_1558269074_d346b7f4',
 'target': 'CPU',
 'status': 'Completed',
 'startTimeUtc': '2019-05-19T12:31:25.120638Z',
 'endTimeUtc': '2019-05-19T12:31:42.295479Z',
 'properties': {'azureml.runsource': 'experiment',
  'AzureML.DerivedImageName': 'azureml/azureml_446928f555a59c6b18e87593e8bdeb69',
  'ContentSnapshotId': 'de43764b-62ae-4991-a572-3c812b5becab',
  'azureml.git.repository_uri': 'https://stvanbod.visualstudio.com/FishyBusiness/_git/FishyBusiness',
  'azureml.git.branch': 'master',
  'azureml.git.commit': '2797d59c6ef882685bb37a36bb570c873df2453a',
  'azureml.git.dirty': 'True',
  'azureml.git.build_id': None,
  'azureml.git.build_uri': None,
  'mlflow.source.git.branch': 'master',
  'mlflow.source.git.commit': '2797d59c6ef882685bb37a36bb570c873df2453a',
  'mlflow.source.git.repoURL': 'https://stvanbod.visualstudio.com/FishyBusiness/_git/FishyBusiness'},
 'runDefinition': {'script': 'extract_frames.py',
  'arguments': [],
  'sourceDirectoryDataStore': None,
