## 0. Setup

In [1]:
import azureml.core
from azureml.core import Workspace
from azureml.core import Experiment
from azureml.core.compute import AmlCompute
from azureml.core.compute import ComputeTarget
import os

In [2]:
# load workspace configuration from the config.json file in the current folder.
ws = Workspace.from_config()
print(ws.name, ws.location, ws.resource_group, ws.location, sep = '\t')
experiment_name = 'mxk-train'
script_folder = './'
exp = Experiment(workspace=ws, name=experiment_name)

# choose a name for your cluster
compute_name = "gpucluster"
compute_min_nodes = 0
compute_max_nodes = 4
vm_size = "STANDARD_NC6"

compute_target = ws.compute_targets[compute_name]
if compute_target and type(compute_target) is AmlCompute:
    print('found compute target. just use it. ' + compute_name)

ds = ws.get_default_datastore()
print(ds.datastore_type, ds.account_name, ds.container_name)

Found the config file in: C:\Users\makayser\Desktop\git\mxk_retinanet\notebook\aml_config\config.json
mak-ml	westeurope	makshared	westeurope
found compute target. just use it. gpucluster
AzureBlob makml9496683038 azureml-blobstore-43aa3424-3674-489b-808b-1e49daacf13c


In [49]:
# ds.upload(src_dir='./bin', target_path='mxk-train', overwrite=True, show_progress=True)

Uploading ./bin\__init__.py
Uploading ./bin\__pycache__\__init__.cpython-36.pyc
Uploading ./bin\__pycache__\train.cpython-36.pyc
Uploading ./bin\convert_model.py
Uploading ./bin\debug.py
Uploading ./bin\evaluate.py
Uploaded ./bin\evaluate.py, 1 files out of an estimated total of 6
Uploaded ./bin\debug.py, 2 files out of an estimated total of 6
Uploaded ./bin\__pycache__\__init__.cpython-36.pyc, 3 files out of an estimated total of 6
Uploaded ./bin\convert_model.py, 4 files out of an estimated total of 6
Uploaded ./bin\__init__.py, 5 files out of an estimated total of 6
Uploaded ./bin\__pycache__\train.cpython-36.pyc, 6 files out of an estimated total of 6


$AZUREML_DATAREFERENCE_912b6e41e21b484084a14416f983d2ac

## Deploy Training

In [3]:
import os
os.chdir('../code')

In [5]:
%load_ext autoreload
%autoreload 2

In [58]:
script_params = {
    '--data-dir': ds.path('mxk').as_mount(),
    '--epochs':1, 
    '--steps':2300,
    '--fl-gamma':1.5,
    '--fl-alpha':0.25,
    '--weights': '/model/resnet50_coco_best_v2.1.0.h5',
    '--backbone' : 'resnet50',
    '--annotations':'/train_set_v2_retina.csv',
    '--classes':'/classes.csv'
}

In [59]:
script_params

{'--data-dir': $AZUREML_DATAREFERENCE_ce01115d841443ae8e1befaabbcad385,
 '--epochs': 1,
 '--steps': 2300,
 '--fl-gamma': 1.5,
 '--fl-alpha': 0.25,
 '--weights': '/model/resnet50_coco_best_v2.1.0.h5',
 '--backbone': 'resnet50',
 '--annotations': '/train_set_v2_retina.csv',
 '--classes': '/classes.csv'}

In [60]:
from azureml.core import RunConfiguration
from azureml.core.conda_dependencies import CondaDependencies
from azureml.core.runconfig import DEFAULT_GPU_IMAGE

cd = CondaDependencies()
for ch in ['conda-forge','anaconda']:
    cd.add_channel(ch)
for pkg in ['opencv=3.4.2', 'tensorflow-gpu','h5py','mesa-libgl-cos6-x86_64', 'pillow', 'six', 'progressbar2',
            'keras']:
    cd.add_conda_package(pkg)
for pkg in ['opencv-python-headless']:
    cd.add_pip_package(pkg)
print(cd.serialize_to_string())

rc = RunConfiguration()
rc.environment.python.conda_dependencies = cd
rc.environment.docker.enabled = True
rc.environment.docker.gpu_support = True
rc.environment.docker.base_image = azureml.core.runconfig.DEFAULT_GPU_IMAGE

from azureml.train.estimator import Estimator
est = Estimator(source_directory=script_folder,
                script_params=script_params,
                compute_target=compute_target,
               entry_script='train.py',                
                environment_definition=rc.environment
               )


# Conda environment specification. The dependencies defined in this file will
# be automatically provisioned for runs with userManagedDependencies=False.

# Details about the Conda environment file format:
# https://conda.io/docs/user-guide/tasks/manage-environments.html#create-env-file-manually

name: project_environment
dependencies:
  # The python interpreter version.
  # Currently Azure ML only supports 3.5.2 and later.
- python=3.6.2

- pip:
    # Required packages for AzureML execution, history, and data preparation.
  - azureml-defaults
  - opencv-python-headless
- opencv=3.4.2
- tensorflow-gpu
- h5py
- mesa-libgl-cos6-x86_64
- pillow
- six
- progressbar2
- keras
channels:
- conda-forge
- anaconda



In [23]:
# from azureml.train.dnn import TensorFlow

# # script_params = {
# #     '--epochs':5, 
# #     '--steps':2300,
# #     '--fl-gamma':1.5,
# #     '--fl-alpha':0.25,
# #     '--weights': ds.path('mxk/model/resnet50_coco_best_v2.1.0.h5').as_mount(),
# #     '--backbone' : 'resnet50',
# #     '--snapshot-path':ds.path('mxk/').as_mount(),
# #     '--random-transform':True,
# #     'csv':True, 
# #     'annotations':ds.path('mxk/train_set_v2_retina.csv').as_mount(),
# #     'classes':ds.path('mxk/classes.csv').as_mount()
# # }


# # '--epochs':5, 
# # '--steps':2300,
# # '--fl-gamma':1.5,
# # '--fl-alpha':0.25,
# # '--weights':'../assets/resnet50_coco_best_v2.1.0.h5', #ds.path('mxk/weights/').as_mount()
# # '--backbone' : 'resnet50',
# # '--snapshot-path':'../assets/run_1',#ds.path('mxk/weights/').as_mount()
# # '--random-transform',
# # 'csv', 
# # '--annotations':'../assets/train_set_v2_retina.csv', #ds.path('mxk/').as_mount()
# # '--classes':'../assets/classes.csv'#ds.path('mxk/').as_mount() 

    
# est = TensorFlow(source_directory=script_folder,
#                  script_params=script_params,
#                  compute_target=compute_target, 
#                  conda_packages=['keras'],#,'h5py','pillow','opencv','progressbar2'],
#                  entry_script='train.py', 
#                  vm_priority='lowpriority',
#                  use_gpu=True)

In [61]:
run = exp.submit(est)

In [62]:
from azureml.widgets import RunDetails
RunDetails(run).show()

_UserRunWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', '…

In [63]:
run

Experiment,Id,Type,Status,Details Page,Docs Page
mxk-train,mxk-train_1552170364_8e2a139e,azureml.scriptrun,Starting,Link to Azure Portal,Link to Documentation


In [None]:
run.wait_for_completion(show_output=True)

RunId: mxk-train_1552170364_8e2a139e

Streaming azureml-logs/20_image_build_log.txt

2019/03/09 22:26:15 Using acb_vol_db96adae-fcd9-4171-94ff-1f02734c4cfc as the home volume
2019/03/09 22:26:15 Creating Docker network: acb_default_network, driver: 'bridge'
2019/03/09 22:26:16 Successfully set up Docker network: acb_default_network
2019/03/09 22:26:16 Setting up Docker configuration...
2019/03/09 22:26:17 Successfully set up Docker configuration
2019/03/09 22:26:17 Logging in to registry: makml4077955604.azurecr.io
2019/03/09 22:26:18 Successfully logged into makml4077955604.azurecr.io
2019/03/09 22:26:18 Executing step ID: acb_step_0. Working directory: '', Network: 'acb_default_network'
2019/03/09 22:26:18 Obtaining source code and scanning for dependencies...
2019/03/09 22:26:18 Successfully obtained source code and scanned for dependencies
2019/03/09 22:26:18 Launching container with name: acb_step_0
Sending build context to Docker daemon  40.96kB

Step 1/13 : FROM mcr.microsoft.co


xorg-inputproto-2.3. | 18 KB     |            |   0% [0m[91m
xorg-inputproto-2.3. | 18 KB     | ########## | 100% [0m[91m

openh264-1.8.0       | 1.4 MB    |            |   0% [0m[91m
openh264-1.8.0       | 1.4 MB    | ########1  |  82% [0m[91m
openh264-1.8.0       | 1.4 MB    | ########## | 100% [0m[91m

certifi-2019.3.9     | 149 KB    |            |   0% [0m[91m
certifi-2019.3.9     | 149 KB    | ########## | 100% [0m[91m

grpcio-1.16.0        | 1.0 MB    |            |   0% [0m[91m
grpcio-1.16.0        | 1.0 MB    | ########5  |  86% [0m[91m
grpcio-1.16.0        | 1.0 MB    | ########## | 100% [0m[91m

gxx_linux-64-7.3.0   | 9 KB      |            |   0% [0m[91m
gxx_linux-64-7.3.0   | 9 KB      | ########## | 100% [0m[91m

xorg-renderproto-0.1 | 8 KB      |            |   0% [0m[91m
xorg-renderproto-0.1 | 8 KB      | ########## | 100% [0m[91m

astor-0.7.1          | 22 KB     |            |   0% [0m[91m
astor-0.7.1          | 22 KB     | ########## | 