## 0. Setup

In [19]:
%matplotlib inline

In [1]:
import azureml.core
from azureml.core import Workspace
from azureml.core import Experiment
from azureml.core.compute import AmlCompute
from azureml.core.compute import ComputeTarget
import os

In [2]:
# load workspace configuration from the config.json file in the current folder.
ws = Workspace.from_config()
print(ws.name, ws.location, ws.resource_group, ws.location, sep = '\t')
experiment_name = 'mxk-train'
script_folder = './'
exp = Experiment(workspace=ws, name=experiment_name)

# choose a name for your cluster
compute_name = "gpucluster"
compute_min_nodes = 0
compute_max_nodes = 4
vm_size = "STANDARD_NC6"

compute_target = ws.compute_targets[compute_name]
if compute_target and type(compute_target) is AmlCompute:
    print('found compute target. just use it. ' + compute_name)

ds = ws.get_default_datastore()
print(ds.datastore_type, ds.account_name, ds.container_name)

Found the config file in: C:\Users\makayser\Desktop\git\mxk_retinanet\notebook\aml_config\config.json
mak-ml	westeurope	makshared	westeurope
found compute target. just use it. gpucluster
AzureBlob makml9496683038 azureml-blobstore-43aa3424-3674-489b-808b-1e49daacf13c


In [3]:
# ds.upload(src_dir='./bin', target_path='mxk-train', overwrite=True, show_progress=True)

## Deploy Training

In [4]:
import os
os.chdir('../code')

In [5]:
%load_ext autoreload
%autoreload 2

In [20]:
script_params = {
    '--data-dir': ds.path('mxk').as_mount(),
    '--epochs':1, 
    '--steps':10,
    '--fl-gamma':1.5,
    '--fl-alpha':0.25,
    '--weights': 'model/resnet50_coco_best_v2.1.0.h5',
    '--backbone' : 'resnet50',
    '--annotations':'train_set_v2_retina.csv',
    '--classes':'classes.csv'#,
    #'--val-annotations':'test_set_v2_retina.csv'
}

In [21]:
script_params

{'--data-dir': $AZUREML_DATAREFERENCE_464c5c466e454036969974d53799f337,
 '--epochs': 1,
 '--steps': 10,
 '--fl-gamma': 1.5,
 '--fl-alpha': 0.25,
 '--weights': 'model/resnet50_coco_best_v2.1.0.h5',
 '--backbone': 'resnet50',
 '--annotations': 'train_set_v2_retina.csv',
 '--classes': 'classes.csv'}

In [54]:
from azureml.train.dnn import TensorFlow
est = TensorFlow(source_directory=script_folder,
                 script_params=script_params,
                 compute_target=compute_target, 
                 conda_packages=['keras'], #'opencv=3.4.2', 'mesa-libgl-cos6-x86_64', 'pillow', 'progressbar2', 
                 entry_script='train.py', 
                 use_gpu=True)

In [59]:
est = Estimator(source_directory = script_folder,
                      entry_script = "train.py",
                      compute_target = compute_target,
                      script_params = script_params,
                      conda_packages = ["opencv=3.4.2", "pillow","mesa-libgl-cos6-x86_64","tensorflow-gpu==1.12", "keras", "h5py",
                                    "progressbar2"],
                      use_gpu = True)


In [48]:
from azureml.core import RunConfiguration
from azureml.core.conda_dependencies import CondaDependencies
from azureml.core.runconfig import DEFAULT_GPU_IMAGE

cd = CondaDependencies()
for ch in ['conda-forge','anaconda']:
    cd.add_channel(ch)
for pkg in ['opencv=3.4.2', 'h5py','mesa-libgl-cos6-x86_64', 'pillow', 'six', 'progressbar2',
            'keras']: #'tensorflow-gpu==1.12',
    cd.add_conda_package(pkg)
for pkg in ['opencv-python-headless']:
    cd.add_pip_package(pkg)
print(cd.serialize_to_string())

rc = RunConfiguration(framework='TensorFlow')

rc.environment.python.conda_dependencies = cd
rc.environment.docker.enabled = True
rc.environment.docker.base_image = azureml.core.runconfig.DEFAULT_GPU_IMAGE
rc.environment.docker.gpu_support = True

#'tensorflow-gpu==1.12',
from azureml.train.estimator import Estimator
est = Estimator(source_directory=script_folder,
                script_params=script_params,
                compute_target=compute_target,
               entry_script='train.py',                
                environment_definition=rc.environment
#                 conda_packages=['opencv=3.4.2', 'h5py','mesa-libgl-cos6-x86_64', 'pillow', 'six', 'progressbar2','keras'],
#                 use_gpu=True
               )


# Conda environment specification. The dependencies defined in this file will
# be automatically provisioned for runs with userManagedDependencies=False.

# Details about the Conda environment file format:
# https://conda.io/docs/user-guide/tasks/manage-environments.html#create-env-file-manually

name: project_environment
dependencies:
  # The python interpreter version.
  # Currently Azure ML only supports 3.5.2 and later.
- python=3.6.2

- pip:
    # Required packages for AzureML execution, history, and data preparation.
  - azureml-defaults
  - opencv-python-headless
- opencv=3.4.2
- h5py
- mesa-libgl-cos6-x86_64
- pillow
- six
- progressbar2
- keras
channels:
- conda-forge
- anaconda



In [60]:
run = exp.submit(est)

In [61]:
from azureml.widgets import RunDetails
RunDetails(run).show()

_UserRunWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', '…

In [62]:
run

Experiment,Id,Type,Status,Details Page,Docs Page
mxk-train,mxk-train_1552242060_eb73ddfa,azureml.scriptrun,Starting,Link to Azure Portal,Link to Documentation


In [None]:
run.wait_for_completion(show_output=True)

RunId: mxk-train_1552242060_eb73ddfa

Streaming azureml-logs/20_image_build_log.txt

2019/03/10 18:21:08 Using acb_vol_fe09d3c6-a606-45de-bd72-789394b4d36d as the home volume
2019/03/10 18:21:08 Creating Docker network: acb_default_network, driver: 'bridge'
2019/03/10 18:21:08 Successfully set up Docker network: acb_default_network
2019/03/10 18:21:08 Setting up Docker configuration...
2019/03/10 18:21:09 Successfully set up Docker configuration
2019/03/10 18:21:09 Logging in to registry: makml4077955604.azurecr.io
2019/03/10 18:21:10 Successfully logged into makml4077955604.azurecr.io
2019/03/10 18:21:10 Executing step ID: acb_step_0. Working directory: '', Network: 'acb_default_network'
2019/03/10 18:21:10 Obtaining source code and scanning for dependencies...
2019/03/10 18:21:11 Successfully obtained source code and scanned for dependencies
2019/03/10 18:21:11 Launching container with name: acb_step_0
Sending build context to Docker daemon  40.96kB

Step 1/13 : FROM mcr.microsoft.co


mkl-2019.1           | 204.6 MB  |            |   0% [0m[91m
mkl-2019.1           | 204.6 MB  | 2          |   3% [0m[91m
mkl-2019.1           | 204.6 MB  | 6          |   6% [0m[91m
mkl-2019.1           | 204.6 MB  | 9          |  10% [0m[91m
mkl-2019.1           | 204.6 MB  | #3         |  13% [0m[91m
mkl-2019.1           | 204.6 MB  | #6         |  17% [0m[91m
mkl-2019.1           | 204.6 MB  | #9         |  20% [0m[91m
mkl-2019.1           | 204.6 MB  | ##3        |  24% [0m[91m
mkl-2019.1           | 204.6 MB  | ##7        |  27% [0m[91m
mkl-2019.1           | 204.6 MB  | ###        |  30% [0m[91m
mkl-2019.1           | 204.6 MB  | ###4       |  34% [0m[91m
mkl-2019.1           | 204.6 MB  | ###7       |  38% [0m[91m
mkl-2019.1           | 204.6 MB  | ####1      |  41% [0m[91m
mkl-2019.1           | 204.6 MB  | ####4      |  45% [0m[91m
mkl-2019.1           | 204.6 MB  | ####8      |  48% [0m[91m
mkl-2019.1           | 204.6 MB  | #####1     |  52% 


icu-58.2             | 22.5 MB   |            |   0% [0m[91m
icu-58.2             | 22.5 MB   | #8         |  19% [0m[91m
icu-58.2             | 22.5 MB   | ####8      |  49% [0m[91m
icu-58.2             | 22.5 MB   | #######5   |  75% [0m[91m
icu-58.2             | 22.5 MB   | ########9  |  89% [0m[91m
icu-58.2             | 22.5 MB   | #########9 | 100% [0m[91m
icu-58.2             | 22.5 MB   | ########## | 100% [0m[91m

ffmpeg-4.0           | 73.7 MB   |            |   0% [0m[91m
ffmpeg-4.0           | 73.7 MB   | 7          |   7% [0m[91m
ffmpeg-4.0           | 73.7 MB   | #6         |  16% [0m[91m
ffmpeg-4.0           | 73.7 MB   | ##5        |  25% [0m[91m
ffmpeg-4.0           | 73.7 MB   | ###4       |  35% [0m[91m
ffmpeg-4.0           | 73.7 MB   | ####3      |  44% [0m[91m
ffmpeg-4.0           | 73.7 MB   | #####3     |  54% [0m[91m
ffmpeg-4.0           | 73.7 MB   | ######3    |  63% [0m[91m
ffmpeg-4.0           | 73.7 MB   | #######3   |  73% 