## **Import statements**

In [2]:
%reload_ext autoreload
%autoreload 2

import matplotlib.pyplot as plt
import numpy as np
import os


## **Configure the Azure ML Compute and Storage**

In [3]:
# Import Azure ML API SDK. The SDK is installed implicitly with the latest
# version of the CLI in your default python environment
import azureml.core

print("You are currently using version", azureml.core.VERSION, "of the Azure ML SDK")

#If everything is in place just load the workspace

#Currently Lukasz' workspace  
from azureml.core import Workspace

ws = Workspace.from_config(path='/home/lukasz/notebooks/azureml/python/aml_config/config.json')

#Setup the data source
data_folder = os.path.join(os.getcwd(), 'data')
os.makedirs(data_folder, exist_ok = True)

print ("Created ", data_folder)
#TODO: copy input files to this folder

source_folder = os.path.join(os.getcwd(), 'scripts')
#copy utils and all files to this folder

You are currently using version 1.0.10 of the Azure ML SDK
Found the config file in: /home/lukasz/notebooks/azureml/python/aml_config/config.json
Created  /home/cerndev/common/azure/data


In [4]:
# get azure storage key
#Assuming there is a single line with a key  
def get_storage_key(file):
    File = open(file, 'r', 0)
    return File.readlines()[0]
    

In [5]:
#list all datastores registered in current workspace
datastores = ws.datastores
for name, ds in datastores.items():
    print(name, ds.datastore_type)
    
#print(ds.datastore_type, ds.account_name, ds.container_name)



workspaceblobstore AzureBlob
workspacefilestore AzureFile
ds_weights AzureBlob
ds_tracks AzureBlob
ds_rphi AzureBlob


In [6]:
from azureml.core import Workspace, Datastore

#get named datastore from current workspace
ds_weights = Datastore.get(ws, datastore_name='ds_weights')
ds_tracks = Datastore.get(ws, datastore_name='ds_tracks')
ds_rphi = Datastore.get(ws, datastore_name='ds_rphi')

## **Set Compute Resources in Azure**

Following options are available:
* **Run-based creation** creates a compute target at runtime. The compute is automatically created for your run. The cluster scales up to the number of max_nodes that you specify in your run config. The compute is deleted automatically once the run completes.

* **Persistent Compute**. A persistent Azure Machine Learning Compute can be reused across jobs. The compute can be shared with other users in the workspace and is kept between jobs.

* **Remote Virtual Machines**. An arbitrary remote VM, as long as it's accessible from Azure Machine Learning service.

* **Azure Batch**. Azure Batch is used to run large-scale parallel and high-performance computing (HPC) applications efficiently in the cloud. AzureBatchStep can be used in an Azure Machine Learning Pipeline to submit jobs to an Azure Batch pool of machines. 

See https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-set-up-training-targets#vm for more details.

**SELECT OPTION #4**

#### Option #1. Use System-managed environment 
* add package dependencies using CondaDependency class
* Conda will create conda_dependencies.yml in aml_config directory in the workspace.

Not tested.


In [1]:
from azureml.core.runconfig import RunConfiguration
from azureml.core.conda_dependencies import CondaDependencies

run_system_managed_temp = RunConfiguration()

# Specify the conda dependencies with scikit-learn
run_system_managed_temp.environment.python.conda_dependencies = CondaDependencies.create(conda_packages=['pytorch','scipy','numpy','scikit-learn','tqdm','cython','torchvision'])

#### Option #2.  Create a compute cluster
* define the cluster configuration directly in the code  
* define the autoscaling rules

Not working for now due to many dependencies that must be met: NVIDIA runtime, deep learning frameworks.
Later can be tested.


In [3]:

from azureml.core.compute import AmlCompute
from azureml.core.compute import ComputeTarget
import os

# choose a name for your cluster
compute_name = os.environ.get("AML_COMPUTE_CLUSTER_NAME", "cpucluster")
compute_min_nodes = os.environ.get("AML_COMPUTE_CLUSTER_MIN_NODES", 0)
compute_max_nodes = os.environ.get("AML_COMPUTE_CLUSTER_MAX_NODES", 2)

# This example uses CPU VM. For using GPU VM, set SKU to STANDARD_NC6
vm_size = os.environ.get("AML_COMPUTE_CLUSTER_SKU", "STANDARD_D2_V2")


if compute_name in ws.compute_targets:
    compute_target = ws.compute_targets[compute_name]
    if compute_target and type(compute_target) is AmlCompute:
        print('found compute target. just use it. ' + compute_name)
else:
    print('creating a new compute target...')
    provisioning_config = AmlCompute.provisioning_configuration(vm_size = vm_size,
                                                                min_nodes = compute_min_nodes, 
                                                                max_nodes = compute_max_nodes)

    # create the cluster
    compute_target = ComputeTarget.create(ws, compute_name, provisioning_config)
    
    # can poll for a minimum number of nodes and for a specific timeout. 
    # if no min node count is provided it will use the scale settings for the cluster
    compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)
    
     # For a more detailed view of current AmlCompute status, use get_status()
    print(compute_target.get_status().serialize())

NameError: name 'ws' is not defined

#### Option #3. Attach to the existing VM
* define the VM in advance and connect to it

Not working for now. Problem with SSH. To be investigated.


In [12]:
from azureml.core.compute import RemoteCompute, ComputeTarget

# Create the compute config 
compute_target_name = "datasciencelmvm"
attach_config = RemoteCompute.attach_configuration(address = "datasciencelmvm.northeurope.cloudapp.azure.com",
                                                 ssh_port=22,
                                                 username='azureuser',
                                                 password="pswd")

#Problem with SSH. VM does not exist, still the code tries to attach.

# If you authenticate with SSH keys instead, use this code:
#                                                  ssh_port=22,
#                                                  username='<username>',
#                                                  password=None,
#                                                  private_key_file="<path-to-file>",
#                                                  private_key_passphrase="<passphrase>")

# Attach the compute
compute_target = ComputeTarget.attach(ws, compute_target_name, attach_config)

compute_target.wait_for_completion(show_output=True)

Creating.
SucceededProvisioning operation finished, operation "Succeeded"


#### Option #4.  Create a Data Science VM 
* VM has many dependencies as a part of the OS image

Testing now.  


In [7]:
from azureml.core.compute import DsvmCompute
import time

dsvm_name = 'dsvmgpu'
try:
    dsvm_compute = DsvmCompute(ws, dsvm_name)
    print('Found an existing DSVM.')
except:
    print('Creating a new DSVM.')
    dsvm_config = DsvmCompute.provisioning_configuration(vm_size = "Standard_NC6") # Standard_D2s_v3
    dsvm_compute = DsvmCompute.create(ws, name = dsvm_name, provisioning_configuration = dsvm_config)
    dsvm_compute.wait_for_completion(show_output = True)
    print("Waiting one minute for ssh to be accessible")
    time.sleep(90) # Wait for ssh to be accessible

compute_target = dsvm_compute

Creating a new DSVM.

        DEPRECATED
        This class will be deprecated soon and we will remove support for it in an upcoming release.
        Please use the "AmlCompute" class instead, or spin up a VM in Azure and attach it using RemoteCompute().
        Use !help AmlCompute to learn more.
        
Creating.............................
SucceededProvisioning operation finished, operation "Succeeded"
Waiting one minute for ssh to be accessible


## Execution Control 

#### Option 1: Execute with Run Configurations (skip for now).
* execute once the compute resource is defined
* Select between Options 1-4
* Define conda and pip dependencies






Option 2: Define Estimators
* execute once the compute resource is defined
* Select between Options 1-4
* Define conda and pip dependencies





In [None]:
from azureml.core import Workspace, Experiment, Run
from azureml.train.estimator import Estimator

#Original script is called as follows:
# dummy-tracking.py --path-template='/storage/data/lhcb/rphi-padded-per-module/rphi_eval_module_{}.dat' --dir-weights=/storage/data/lhcb/weights/ 
#--truth-tracks-path='/storage/data/lhcb/tracker_data/FullTrackingTruthTracks.npy' 

script_params = {
    '--path-template': ds_rphi.as_download(),
    '--template-pattern': 'rphi_eval_module_{}.dat',
    '--dir-weights': ds_weights.as_download(),
    '--truth-tracks-path': ds_tracks.as_download()
}

# compute_target is a CPU cluster
# my_compute_target is a default compute from AzureML (not tested)
est = Estimator(source_directory=source_folder,
                script_params=script_params,
                compute_target=compute_target,  
                entry_script='dummy-tracker.py',
                pip_packages=['pynvrtc'],
                conda_packages=['pytorch','scipy','numpy','scikit-learn','tqdm','cython','torchvision','cudatoolkit=9.0','matplotlib'])

#ImportError: .../torch/lib/libtorch.so.1: undefined symbol: nvrtcGetProgramLogSize
#Testing pytorch-cpu version -> problem 
#ResolvePackageNotFound: torchvision-cpu, pytorch-cpu

#OR add cudatoolkit

# Create the experiment
experiment = Experiment(workspace = ws, name = "KDTree-GPU")
#my_compute_target = run_system_managed_temp.amlcompute

run = experiment.submit(est)
run.wait_for_completion(show_output = True)

#run_system_managed_temp = experiment.start_logging()

RunId: KDTree-GPU_1553497474_ad48129c

Streaming azureml-logs/60_control_log.txt

Streaming log file azureml-logs/60_control_log.txt
Running: ['/bin/bash', '/tmp/azureml_runs/KDTree-GPU_1553497474_ad48129c/azureml-setup/docker_env_checker.sh']

Materialized image not found on target: azureml/azureml_dfe42988f99b80518fa6dde5551e811c


Logging experiment preparation status in history service.
Running: ['/bin/bash', '/tmp/azureml_runs/KDTree-GPU_1553497474_ad48129c/azureml-setup/docker_env_builder.sh']
Running: ['sudo', 'docker', 'build', '-f', 'azureml-setup/Dockerfile', '-t', 'azureml/azureml_dfe42988f99b80518fa6dde5551e811c', '.']
Sending build context to Docker daemon  317.4kB
Step 1/15 : FROM mcr.microsoft.com/azureml/base:0.2.1@sha256:efeb2cde547e536644b69a2708050e244d2df4b7e0fd84351af8bf7bafdf5485
sha256:efeb2cde547e536644b69a2708050e244d2df4b7e0fd84351af8bf7bafdf5485: Pulling from azureml/base
7b722c1070cd: Pulling fs layer
5fbf74db61f1: Pulling fs layer
ed41cb72e5c9: Pulling fs l

Collecting applicationinsights>=0.11.7 (from azureml-defaults->-r /azureml-setup/condaenv.8wem2ibe.requirements.txt (line 1))
  Downloading https://files.pythonhosted.org/packages/de/bc/8e738cc3b74551c1a63889ff32c4456c22246ec89cfae3bf6a0a126a29c8/applicationinsights-0.11.8-py2.py3-none-any.whl (58kB)
Collecting requests>=2.19.1 (from azureml-core==1.0.18.*->azureml-defaults->-r /azureml-setup/condaenv.8wem2ibe.requirements.txt (line 1))
  Downloading https://files.pythonhosted.org/packages/7d/e3/20f3d364d6c8e5d2353c72a67778eb189176f08e873c9900e10c0287b84b/requests-2.21.0-py2.py3-none-any.whl (57kB)
Collecting urllib3>=1.23 (from azureml-core==1.0.18.*->azureml-defaults->-r /azureml-setup/condaenv.8wem2ibe.requirements.txt (line 1))
  Downloading https://files.pythonhosted.org/packages/62/00/ee1d7de624db8ba7090d1226aebefab96a2c71cd5cfa7629d6ad3f61b79e/urllib3-1.24.1-py2.py3-none-any.whl (118kB)
Collecting azure-mgmt-keyvault>=0.40.0 (from azureml-core==1.0.18.*->azureml-defaults->-r /az

  Downloading https://files.pythonhosted.org/packages/b3/c2/af4b47845f27dc7d206ed4908b9e580f8bc94a4b2f3956a0d87c40719d90/azure_mgmt_nspkg-3.0.2-py3-none-any.whl
Collecting isodate>=0.6.0 (from msrest>=0.5.1->azureml-core==1.0.18.*->azureml-defaults->-r /azureml-setup/condaenv.8wem2ibe.requirements.txt (line 1))
  Downloading https://files.pythonhosted.org/packages/9b/9f/b36f7774ff5ea8e428fdcfc4bb332c39ee5b9362ddd3d40d9516a55221b2/isodate-0.6.0-py2.py3-none-any.whl (45kB)
Collecting requests-oauthlib>=0.5.0 (from msrest>=0.5.1->azureml-core==1.0.18.*->azureml-defaults->-r /azureml-setup/condaenv.8wem2ibe.requirements.txt (line 1))
  Downloading https://files.pythonhosted.org/packages/c2/e2/9fd03d55ffb70fe51f587f20bcf407a6927eb121de86928b34d162f0b1ac/requests_oauthlib-1.2.0-py2.py3-none-any.whl
Collecting docker-pycreds>=0.4.0 (from docker->azureml-core==1.0.18.*->azureml-defaults->-r /azureml-setup/condaenv.8wem2ibe.requirements.txt (line 1))
  Downloading https://files.pythonhosted.org

In [None]:
run
#run.log(name, value, description='')

# ImportError: /azureml-envs/azureml_aa3800697c3da90356674c5d162cf01e/lib/python3.6/site-packages/torch/lib/libtorch.so.1: undefined symbol: nvrtcGetProgramLogSize
