## 0. Setup

In [19]:
%matplotlib inline

In [1]:
import azureml.core
from azureml.core import Workspace
from azureml.core import Experiment
from azureml.core.compute import AmlCompute
from azureml.core.compute import ComputeTarget
import os

In [2]:
# load workspace configuration from the config.json file in the current folder.
ws = Workspace.from_config()
print(ws.name, ws.location, ws.resource_group, ws.location, sep = '\t')
experiment_name = 'mxk'
script_folder = './'
exp = Experiment(workspace=ws, name=experiment_name)

# choose a name for your cluster
compute_name = "gpucluster"
compute_min_nodes = 0
compute_max_nodes = 4
vm_size = "STANDARD_NC6"

compute_target = ws.compute_targets[compute_name]
if compute_target and type(compute_target) is AmlCompute:
    print('found compute target. just use it. ' + compute_name)

ds = ws.get_default_datastore()
print(ds.datastore_type, ds.account_name, ds.container_name)

Found the config file in: C:\Users\makayser\Desktop\git\mxk_retinanet\notebook\aml_config\config.json
mak-ml	westeurope	makshared	westeurope
found compute target. just use it. gpucluster
AzureBlob makml9496683038 azureml-blobstore-43aa3424-3674-489b-808b-1e49daacf13c


In [3]:
# ds.upload(src_dir='./bin', target_path='mxk-train', overwrite=True, show_progress=True)

## Deploy Training

In [4]:
import os
os.chdir('../code')

In [5]:
%load_ext autoreload
%autoreload 2

In [88]:
script_params = {
    '--data-dir': ds.path('mxk').as_mount(),
    '--epochs':4, 
    '--steps':20,
    '--fl-gamma':1.5,
    '--fl-alpha':0.25,
    '--weights': 'model/resnet50_coco_best_v2.1.0.h5',
    '--backbone' : 'resnet50',
    '--annotations':'train_set_v2_retina.csv',
    '--classes':'classes.csv',
    '--val-annotations':'test_set_v2_retina.csv',
    '--no-snapshots':''
}

In [89]:
script_params

{'--data-dir': $AZUREML_DATAREFERENCE_1f803dc832f0452b9c0a3f1dbea43898,
 '--epochs': 4,
 '--steps': 20,
 '--fl-gamma': 1.5,
 '--fl-alpha': 0.25,
 '--weights': 'model/resnet50_coco_best_v2.1.0.h5',
 '--backbone': 'resnet50',
 '--annotations': 'train_set_v2_retina.csv',
 '--classes': 'classes.csv',
 '--val-annotations': 'test_set_v2_retina.csv',
 '--no-snapshots': ''}

In [None]:
from azureml.train.hyperdrive import RandomParameterSampling
from azureml.train.hyperdrive import BanditPolicy

In [None]:
max_total_runs=30,
max_concurrent_runs=4

In [None]:
primary_metric_name="EAD_Score",
primary_metric_goal=PrimaryMetricGoal.MAXIMIZE

In [None]:

param_sampling = RandomParameterSampling( {
        "--learning_rate": uniform(1e-6, 1e-04),
        "--batch_size": choice(1, 4, 8, 16, 32),
        "--fl_gamma": choice(0.75, 1, 1.25, 1.5, 1.75, 2, 2.25, 2.5),
        "--fl_alpha": choice(0.25, 0.5, 0.75, 1)
    }
)
# slack: slack allowed with respect to the best performing training run

early_termination_policy = BanditPolicy(slack_factor = 0.25, evaluation_interval=1, delay_evaluation=4)

In [90]:
est = Estimator(source_directory = script_folder,
                compute_target = compute_target,
                script_params = script_params,
                entry_script = "train.py",
                pip_packages = ["tensorflow-gpu==1.12", "keras", "h5py", "progressbar2", "opencv-python-headless"],
                conda_packages=['opencv', 'h5py','mesa-libgl-cos6-x86_64', 'pillow'],
                use_gpu = True)


In [None]:
from azureml.train.hyperdrive import HyperDriveRunConfig
hyperdrive_run_config = HyperDriveRunConfig(estimator=est,
                          hyperparameter_sampling=param_sampling, 
                          policy=early_termination_policy,
                          primary_metric_name=primary_metric_name, 
                          primary_metric_goal=PrimaryMetricGoal.MAXIMIZE,
                          max_total_runs=max_total_runs,
                          max_concurrent_runs=max_concurrent_runs)

In [91]:
run = exp.submit(est)

In [92]:
from azureml.widgets import RunDetails
RunDetails(run).show()

_UserRunWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', '…

In [93]:
run

Experiment,Id,Type,Status,Details Page,Docs Page
mxk-train,mxk-train_1552250362_1fdf09c8,azureml.scriptrun,Queued,Link to Azure Portal,Link to Documentation


In [94]:
run.wait_for_completion(show_output=True)

RunId: mxk-train_1552250362_1fdf09c8

Streaming azureml-logs/60_control_log.txt

Streaming log file azureml-logs/60_control_log.txt

Streaming azureml-logs/80_driver_log.txt

Using TensorFlow backend.
2019-03-10 20:46:16.643694: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
2019-03-10 20:46:16.878169: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1432] Found device 0 with properties: 
name: Tesla K80 major: 3 minor: 7 memoryClockRate(GHz): 0.8235
pciBusID: 4c0f:00:00.0
totalMemory: 11.17GiB freeMemory: 11.10GiB
2019-03-10 20:46:16.878207: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1511] Adding visible gpu devices: 0
2019-03-10 20:46:17.158016: I tensorflow/core/common_runtime/gpu/gpu_device.cc:982] Device interconnect StreamExecutor with strength 1 edge matrix:
2019-03-10 20:46:17.158063: I tensorflow/core/common_runtime/gpu/gpu_device.cc:988]      0 
2019-03-10 20:46:17.158

{'runId': 'mxk-train_1552250362_1fdf09c8',
 'target': 'gpucluster',
 'status': 'Failed',
 'startTimeUtc': '2019-03-10T20:43:42.271157Z',
 'endTimeUtc': '2019-03-10T20:51:30.698204Z',
 'properties': {'azureml.runsource': 'experiment',
  'ContentSnapshotId': '1246f492-86e6-492e-9084-24f479519306'},
 'runDefinition': {'Script': 'train.py',
  'Arguments': ['--data-dir',
   '$AZUREML_DATAREFERENCE_1f803dc832f0452b9c0a3f1dbea43898',
   '--epochs',
   '4',
   '--steps',
   '20',
   '--fl-gamma',
   '1.5',
   '--fl-alpha',
   '0.25',
   '--weights',
   'model/resnet50_coco_best_v2.1.0.h5',
   '--backbone',
   'resnet50',
   '--annotations',
   'train_set_v2_retina.csv',
   '--classes',
   'classes.csv',
   '--val-annotations',
   'test_set_v2_retina.csv',
   '--no-snapshots'],
  'SourceDirectoryDataStore': None,
  'Framework': 0,
  'Communicator': 0,
  'Target': 'gpucluster',
  'DataReferences': {'1f803dc832f0452b9c0a3f1dbea43898': {'DataStoreName': 'workspaceblobstore',
    'Mode': 'Mount',
 

In [48]:
# from azureml.core import RunConfiguration
# from azureml.core.conda_dependencies import CondaDependencies
# from azureml.core.runconfig import DEFAULT_GPU_IMAGE

# cd = CondaDependencies()
# for ch in ['conda-forge','anaconda']:
#     cd.add_channel(ch)
# for pkg in ['opencv=3.4.2', 'h5py','mesa-libgl-cos6-x86_64', 'pillow', 'six', 'progressbar2',
#             'keras']: #'tensorflow-gpu==1.12',
#     cd.add_conda_package(pkg)
# for pkg in ['opencv-python-headless']:
#     cd.add_pip_package(pkg)
# print(cd.serialize_to_string())

# rc = RunConfiguration(framework='TensorFlow')

# rc.environment.python.conda_dependencies = cd
# rc.environment.docker.enabled = True
# rc.environment.docker.base_image = azureml.core.runconfig.DEFAULT_GPU_IMAGE
# rc.environment.docker.gpu_support = True

# #'tensorflow-gpu==1.12',
# from azureml.train.estimator import Estimator
# est = Estimator(source_directory=script_folder,
#                 script_params=script_params,
#                 compute_target=compute_target,
#                entry_script='train.py',                
#                 environment_definition=rc.environment
# #                 conda_packages=['opencv=3.4.2', 'h5py','mesa-libgl-cos6-x86_64', 'pillow', 'six', 'progressbar2','keras'],
# #                 use_gpu=True
#                )


# Conda environment specification. The dependencies defined in this file will
# be automatically provisioned for runs with userManagedDependencies=False.

# Details about the Conda environment file format:
# https://conda.io/docs/user-guide/tasks/manage-environments.html#create-env-file-manually

name: project_environment
dependencies:
  # The python interpreter version.
  # Currently Azure ML only supports 3.5.2 and later.
- python=3.6.2

- pip:
    # Required packages for AzureML execution, history, and data preparation.
  - azureml-defaults
  - opencv-python-headless
- opencv=3.4.2
- h5py
- mesa-libgl-cos6-x86_64
- pillow
- six
- progressbar2
- keras
channels:
- conda-forge
- anaconda



In [71]:
# est = Estimator(source_directory = script_folder,
#                 compute_target = compute_target,
#                 script_params = script_params,
#                 conda_packages = ['tensorflow-gpu==1.12','opencv=3.4.2', 'h5py','mesa-libgl-cos6-x86_64', 'pillow', 'progressbar2',
#                             'keras'],
#                 pip_packages=['opencv-python-headless'],
#                 custom_docker_image=azureml.core.runconfig.DEFAULT_GPU_IMAGE,
#                 use_gpu = True,
#                 entry_script = "train.py")


In [75]:
run.wait_for_completion(show_output=True)

RunId: mxk-train_1552247946_5bff78fa

Streaming azureml-logs/20_image_build_log.txt

2019/03/10 19:59:15 Using acb_vol_2d0a29c8-cc1f-458b-ba79-c63b186d15d5 as the home volume
2019/03/10 19:59:15 Creating Docker network: acb_default_network, driver: 'bridge'
2019/03/10 19:59:16 Successfully set up Docker network: acb_default_network
2019/03/10 19:59:16 Setting up Docker configuration...
2019/03/10 19:59:16 Successfully set up Docker configuration
2019/03/10 19:59:16 Logging in to registry: makml4077955604.azurecr.io
2019/03/10 19:59:17 Successfully logged into makml4077955604.azurecr.io
2019/03/10 19:59:17 Executing step ID: acb_step_0. Working directory: '', Network: 'acb_default_network'
2019/03/10 19:59:17 Obtaining source code and scanning for dependencies...
2019/03/10 19:59:18 Successfully obtained source code and scanned for dependencies
2019/03/10 19:59:18 Launching container with name: acb_step_0
Sending build context to Docker daemon  40.96kB

Step 1/13 : FROM mcr.microsoft.co


pillow-5.4.1         | 627 KB    |            |   0% [0m[91m
pillow-5.4.1         | 627 KB    | ########7  |  87% [0m[91m
pillow-5.4.1         | 627 KB    | ########## | 100% [0m[91m

libdrm-cos6-x86_64-2 | 132 KB    |            |   0% [0m[91m
libdrm-cos6-x86_64-2 | 132 KB    | ########## | 100% [0m[91m

keras-preprocessing- | 52 KB     |            |   0% [0m[91m
keras-preprocessing- | 52 KB     | ########## | 100% [0m[91m

mkl_random-1.0.2     | 407 KB    |            |   0% [0m[91m
mkl_random-1.0.2     | 407 KB    | ########## | 100% [0m[91m

tensorflow-base-1.12 | 216.9 MB  |            |   0% [0m[91m
tensorflow-base-1.12 | 216.9 MB  | 2          |   2% [0m[91m
tensorflow-base-1.12 | 216.9 MB  | 5          |   5% [0m[91m
tensorflow-base-1.12 | 216.9 MB  | 8          |   8% [0m[91m
tensorflow-base-1.12 | 216.9 MB  | #1         |  12% [0m[91m
tensorflow-base-1.12 | 216.9 MB  | #4         |  15% [0m[91m
tensorflow-base-1.12 | 216.9 MB  | #7         |  1


mesa-libgl-cos6-x86_ | 179 KB    |            |   0% [0m[91m
mesa-libgl-cos6-x86_ | 179 KB    | ########## | 100% [0m[91m

pcre-8.42            | 251 KB    |            |   0% [0m[91m
pcre-8.42            | 251 KB    | ########## | 100% [0m[91m

numpy-1.16.2         | 49 KB     |            |   0% [0m[91m
numpy-1.16.2         | 49 KB     | ########## | 100% [0m[91m

fontconfig-2.13.0    | 291 KB    |            |   0% [0m[91m
fontconfig-2.13.0    | 291 KB    | ########## | 100% [0m[91m

hdf5-1.10.2          | 5.2 MB    |            |   0% [0m[91m
hdf5-1.10.2          | 5.2 MB    | #######6   |  77% [0m[91m
hdf5-1.10.2          | 5.2 MB    | #########  |  91% [0m[91m
hdf5-1.10.2          | 5.2 MB    | ########## | 100% [0m[91m

mkl_fft-1.0.10       | 170 KB    |            |   0% [0m[91m
mkl_fft-1.0.10       | 170 KB    | ########## | 100% 

libx11-cos6-x86_64-1 | 730 KB    |            |   0% [0m[91m
libx11-cos6-x86_64-1 | 730 KB    | #########5 |  96% [0m


libopencv-3.4.2      | 40.4 MB   |            |   0% [0m[91m
libopencv-3.4.2      | 40.4 MB   | #1         |  11% [0m[91m
libopencv-3.4.2      | 40.4 MB   | ##6        |  27% [0m[91m
libopencv-3.4.2      | 40.4 MB   | ####2      |  42% [0m[91m
libopencv-3.4.2      | 40.4 MB   | #####8     |  59% [0m[91m
libopencv-3.4.2      | 40.4 MB   | #######5   |  75% [0m[91m
libopencv-3.4.2      | 40.4 MB   | ########5  |  86% [0m[91m
libopencv-3.4.2      | 40.4 MB   | #########3 |  93% [0m[91m
libopencv-3.4.2      | 40.4 MB   | #########8 |  99% [0m[91m
libopencv-3.4.2      | 40.4 MB   | ########## | 100% [0m[91m

tensorflow-gpu-1.12. | 2 KB      |            |   0% [0m[91m
tensorflow-gpu-1.12. | 2 KB      | ########## | 100% [0m[91m

pluggy-0.9.0         | 30 KB     |            |   0% [0m[91m
pluggy-0.9.0         | 30 KB     | ########## | 100% [0m[91m

protobuf-3.6.1       | 616 KB    |            |   0% [0m[91m
protobuf-3.6.1       | 616 KB    | ########1  |  81


wheel-0.33.1         | 39 KB     |            |   0% [0m[91m
wheel-0.33.1         | 39 KB     | ########## | 100% [0m[91m

cudatoolkit-9.2      | 351.0 MB  |            |   0% [0m[91m
cudatoolkit-9.2      | 351.0 MB  | 1          |   1% [0m[91m
cudatoolkit-9.2      | 351.0 MB  | 2          |   3% [0m[91m
cudatoolkit-9.2      | 351.0 MB  | 4          |   5% [0m[91m
cudatoolkit-9.2      | 351.0 MB  | 6          |   7% [0m[91m
cudatoolkit-9.2      | 351.0 MB  | 8          |   8% [0m[91m
cudatoolkit-9.2      | 351.0 MB  | #          |  10% [0m[91m
cudatoolkit-9.2      | 351.0 MB  | #2         |  12% [0m[91m
cudatoolkit-9.2      | 351.0 MB  | #4         |  14% [0m[91m
cudatoolkit-9.2      | 351.0 MB  | #6         |  16% [0m[91m
cudatoolkit-9.2      | 351.0 MB  | #8         |  18% [0m[91m
cudatoolkit-9.2      | 351.0 MB  | ##         |  20% [0m[91m
cudatoolkit-9.2      | 351.0 MB  | ##2        |  22% [0m[91m
cudatoolkit-9.2      | 351.0 MB  | ##4        |  24% 


scipy-1.2.1          | 17.7 MB   |            |   0% [0m[91m
scipy-1.2.1          | 17.7 MB   | ##5        |  26% [0m[91m
scipy-1.2.1          | 17.7 MB   | ######7    |  68% [0m[91m
scipy-1.2.1          | 17.7 MB   | ########5  |  86% [0m[91m
scipy-1.2.1          | 17.7 MB   | #########8 |  99% [0m[91m
scipy-1.2.1          | 17.7 MB   | ########## | 100% [0m[91m

python-3.6.2         | 27.0 MB   |            |   0% [0m[91m
python-3.6.2         | 27.0 MB   | #9         |  19% [0m[91m
python-3.6.2         | 27.0 MB   | ####5      |  46% [0m[91m
python-3.6.2         | 27.0 MB   | #######3   |  73% [0m[91m
python-3.6.2         | 27.0 MB   | ########9  |  90% [0m[91m
python-3.6.2         | 27.0 MB   | ########## | 100% [0m[91m

libx11-common-cos6-x | 132 KB    |            |   0% [0m[91m
libx11-common-cos6-x | 132 KB    | #########6 |  96% [0m[91m
libx11-common-cos6-x | 132 KB    | ########## | 100% [0m[91m

h5py-2.8.0           | 1.1 MB    |            |   0

{'runId': 'mxk-train_1552247946_5bff78fa',
 'target': 'gpucluster',
 'status': 'Failed',
 'endTimeUtc': '2019-03-10T20:06:23.752241Z',
 'properties': {'azureml.runsource': 'experiment',
  'ContentSnapshotId': '7ee52c87-f0f8-4a9d-b0a1-983cc0b5e358'},
 'runDefinition': {'Script': 'train.py',
  'Arguments': ['--data-dir',
   '$AZUREML_DATAREFERENCE_464c5c466e454036969974d53799f337',
   '--epochs',
   '1',
   '--steps',
   '10',
   '--fl-gamma',
   '1.5',
   '--fl-alpha',
   '0.25',
   '--weights',
   'model/resnet50_coco_best_v2.1.0.h5',
   '--backbone',
   'resnet50',
   '--annotations',
   'train_set_v2_retina.csv',
   '--classes',
   'classes.csv'],
  'SourceDirectoryDataStore': None,
  'Framework': 0,
  'Communicator': 0,
  'Target': 'gpucluster',
  'DataReferences': {'464c5c466e454036969974d53799f337': {'DataStoreName': 'workspaceblobstore',
    'Mode': 'Mount',
    'PathOnDataStore': 'mxk',
    'PathOnCompute': None,
    'Overwrite': False}},
  'JobName': None,
  'AutoPrepareEnviron

In [63]:
run.wait_for_completion(show_output=True)

RunId: mxk-train_1552242060_eb73ddfa

Streaming azureml-logs/20_image_build_log.txt

2019/03/10 18:21:08 Using acb_vol_fe09d3c6-a606-45de-bd72-789394b4d36d as the home volume
2019/03/10 18:21:08 Creating Docker network: acb_default_network, driver: 'bridge'
2019/03/10 18:21:08 Successfully set up Docker network: acb_default_network
2019/03/10 18:21:08 Setting up Docker configuration...
2019/03/10 18:21:09 Successfully set up Docker configuration
2019/03/10 18:21:09 Logging in to registry: makml4077955604.azurecr.io
2019/03/10 18:21:10 Successfully logged into makml4077955604.azurecr.io
2019/03/10 18:21:10 Executing step ID: acb_step_0. Working directory: '', Network: 'acb_default_network'
2019/03/10 18:21:10 Obtaining source code and scanning for dependencies...
2019/03/10 18:21:11 Successfully obtained source code and scanned for dependencies
2019/03/10 18:21:11 Launching container with name: acb_step_0
Sending build context to Docker daemon  40.96kB

Step 1/13 : FROM mcr.microsoft.co


mkl-2019.1           | 204.6 MB  |            |   0% [0m[91m
mkl-2019.1           | 204.6 MB  | 2          |   3% [0m[91m
mkl-2019.1           | 204.6 MB  | 6          |   6% [0m[91m
mkl-2019.1           | 204.6 MB  | 9          |  10% [0m[91m
mkl-2019.1           | 204.6 MB  | #3         |  13% [0m[91m
mkl-2019.1           | 204.6 MB  | #6         |  17% [0m[91m
mkl-2019.1           | 204.6 MB  | #9         |  20% [0m[91m
mkl-2019.1           | 204.6 MB  | ##3        |  24% [0m[91m
mkl-2019.1           | 204.6 MB  | ##7        |  27% [0m[91m
mkl-2019.1           | 204.6 MB  | ###        |  30% [0m[91m
mkl-2019.1           | 204.6 MB  | ###4       |  34% [0m[91m
mkl-2019.1           | 204.6 MB  | ###7       |  38% [0m[91m
mkl-2019.1           | 204.6 MB  | ####1      |  41% [0m[91m
mkl-2019.1           | 204.6 MB  | ####4      |  45% [0m[91m
mkl-2019.1           | 204.6 MB  | ####8      |  48% [0m[91m
mkl-2019.1           | 204.6 MB  | #####1     |  52% 


icu-58.2             | 22.5 MB   |            |   0% [0m[91m
icu-58.2             | 22.5 MB   | #8         |  19% [0m[91m
icu-58.2             | 22.5 MB   | ####8      |  49% [0m[91m
icu-58.2             | 22.5 MB   | #######5   |  75% [0m[91m
icu-58.2             | 22.5 MB   | ########9  |  89% [0m[91m
icu-58.2             | 22.5 MB   | #########9 | 100% [0m[91m
icu-58.2             | 22.5 MB   | ########## | 100% [0m[91m

ffmpeg-4.0           | 73.7 MB   |            |   0% [0m[91m
ffmpeg-4.0           | 73.7 MB   | 7          |   7% [0m[91m
ffmpeg-4.0           | 73.7 MB   | #6         |  16% [0m[91m
ffmpeg-4.0           | 73.7 MB   | ##5        |  25% [0m[91m
ffmpeg-4.0           | 73.7 MB   | ###4       |  35% [0m[91m
ffmpeg-4.0           | 73.7 MB   | ####3      |  44% [0m[91m
ffmpeg-4.0           | 73.7 MB   | #####3     |  54% [0m[91m
ffmpeg-4.0           | 73.7 MB   | ######3    |  63% [0m[91m
ffmpeg-4.0           | 73.7 MB   | #######3   |  73% 


yaml-0.1.7           | 85 KB     |            |   0% [0m[91m
yaml-0.1.7           | 85 KB     | ########## | 100% [0m[91m

opencv-3.4.2         | 11 KB     |            |   0% [0m[91m
opencv-3.4.2         | 11 KB     | ########## | 100% [0m[91m

libstdcxx-ng-8.2.0   | 2.9 MB    |            |   0% [0m[91m
libstdcxx-ng-8.2.0   | 2.9 MB    | #######7   |  78% [0m[91m
libstdcxx-ng-8.2.0   | 2.9 MB    | ########## | 100% [0m[91m

python-3.6.2         | 27.0 MB   |            |   0% [0m[91m
python-3.6.2         | 27.0 MB   | ##         |  20% [0m[91m
python-3.6.2         | 27.0 MB   | ####4      |  44% [0m[91m
python-3.6.2         | 27.0 MB   | #######2   |  73% [0m[91m
python-3.6.2         | 27.0 MB   | ########9  |  90% [0m[91m
python-3.6.2         | 27.0 MB   | ########## | 100% [0m[91m

scipy-1.2.1          | 17.7 MB   |            |   0% [0m[91m
scipy-1.2.1          | 17.7 MB   | ##2        |  23% [0m[91m
scipy-1.2.1          | 17.7 MB   | #####4     |  5


wheel-0.33.1         | 39 KB     |            |   0% [0m[91m
wheel-0.33.1         | 39 KB     | ########## | 100% [0m[91m

markdown-3.0.1       | 107 KB    |            |   0% [0m[91m
markdown-3.0.1       | 107 KB    | ########## | 100% [0m[91m

libxml2-2.9.9        | 2.0 MB    |            |   0% [0m[91m
libxml2-2.9.9        | 2.0 MB    | #######8   |  79% [0m[91m
libxml2-2.9.9        | 2.0 MB    | ########## | 100% [0m[91m

blas-1.0             | 6 KB      |            |   0% [0m[91m
blas-1.0             | 6 KB      | ########## | 100% [0m[91m

grpcio-1.14.1        | 1.0 MB    |            |   0% [0m[91m
grpcio-1.14.1        | 1.0 MB    | ########1  |  82% [0m[91m
grpcio-1.14.1        | 1.0 MB    | ########## | 100% [0m[91m

numpy-base-1.16.2    | 4.4 MB    |            |   0% [0m[91m
numpy-base-1.16.2    | 4.4 MB    | #######6   |  76% [0m[91m
numpy-base-1.16.2    | 4.4 MB    | #########5 |  95% [0m[91m
numpy-base-1.16.2    | 4.4 MB    | ########## | 1

{'runId': 'mxk-train_1552242060_eb73ddfa',
 'target': 'gpucluster',
 'status': 'Failed',
 'endTimeUtc': '2019-03-10T18:28:03.178255Z',
 'properties': {'azureml.runsource': 'experiment',
  'ContentSnapshotId': '6861d20d-eb6f-484c-8952-ae25ffb0fffe'},
 'runDefinition': {'Script': 'train.py',
  'Arguments': ['--data-dir',
   '$AZUREML_DATAREFERENCE_464c5c466e454036969974d53799f337',
   '--epochs',
   '1',
   '--steps',
   '10',
   '--fl-gamma',
   '1.5',
   '--fl-alpha',
   '0.25',
   '--weights',
   'model/resnet50_coco_best_v2.1.0.h5',
   '--backbone',
   'resnet50',
   '--annotations',
   'train_set_v2_retina.csv',
   '--classes',
   'classes.csv'],
  'SourceDirectoryDataStore': None,
  'Framework': 0,
  'Communicator': 0,
  'Target': 'gpucluster',
  'DataReferences': {'464c5c466e454036969974d53799f337': {'DataStoreName': 'workspaceblobstore',
    'Mode': 'Mount',
    'PathOnDataStore': 'mxk',
    'PathOnCompute': None,
    'Overwrite': False}},
  'JobName': None,
  'AutoPrepareEnviron