# Configuration Azure Machine Learning Service

In [1]:
# Check core SDK version number
import azureml.core

print("SDK version:", azureml.core.VERSION)

SDK version: 0.1.65


In [2]:
from azureml.core.workspace import Workspace

ws = Workspace.from_config()
print('Workspace name: ' + ws.name, 
      'Azure region: ' + ws.location, 
      'Subscription id: ' + ws.subscription_id, 
      'Resource group: ' + ws.resource_group, sep = '\n')

Found the config file in: C:\Users\Thijs\PycharmProjects\LEGO Vision\Notebooks\aml_config\config.json
Workspace name: LEGO-Vision
Azure region: westeurope
Subscription id: 2f5c43ac-ac34-401d-bead-2dc43ffdc4b1
Resource group: LEGO-Vision


In [3]:
from azureml.core.compute import ComputeTarget, BatchAiCompute
from azureml.core.compute_target import ComputeTargetException

# choose a name for your cluster
cluster_name = "LEGO-Visiongpu-3"

try:
    compute_target_three = ComputeTarget(workspace=ws, name=cluster_name)
    print('Found existing compute target.')
except ComputeTargetException:
    print('Creating a new compute target...')
    compute_config = BatchAiCompute.provisioning_configuration(vm_size='STANDARD_NC6', 
                                                                autoscale_enabled=True,
                                                                cluster_min_nodes=0, 
                                                                cluster_max_nodes=4)

    # create the cluster
    compute_target_three = ComputeTarget.create(ws, cluster_name, compute_config)

    compute_target_three.wait_for_completion(show_output=True)

    # Use the 'status' property to get a detailed status for the current cluster. 
    print(compute_target_three.status.serialize())

Found existing compute target.


In [4]:
ds = ws.get_default_datastore()
print(ds.datastore_type, ds.account_name, ds.container_name)

AzureFile legovision1529597820 azureml-filestore-f0102bff-6d0a-47ad-b50c-4709fd2f206a


In [5]:
ds.upload(src_dir=r'D:\LEGO Vision Datasets\classification-synthetic-data', target_path='lego-vision-classification-synthetic-data-white-background', overwrite=True, show_progress=True)

$AZUREML_DATAREFERENCE_78722bcbd92844379853f4731f1df55a

# Naive Bayes

## Natural Data

In [5]:
# ds_data_dir_syn = ds.path('lego-vision-classification')
ds_data_dir_nat = ds.path('lego-vision-classification-natural-data-white-background')

print(ds_data_dir_nat)

$AZUREML_DATAREFERENCE_116bc9565d63408c8de4e90fa28fb1c7


In [6]:
import os

project_folder = './nb-hog-dom-selfmade-hyperdrive'
os.makedirs(project_folder, exist_ok=True)

In [7]:
import shutil
shutil.copy('internal_hyperdrive_natural_data.py', project_folder)

'./nb-hog-dom-selfmade-hyperdrive\\internal_hyperdrive_natural_data.py'

In [8]:
shutil.copy('helpers.py', project_folder)

'./nb-hog-dom-selfmade-hyperdrive\\helpers.py'

In [9]:
from azureml.core import Experiment

experiment_name = 'nb-hog-dom-selfmade-hyperdrive'
experiment_nb_natural_data = Experiment(ws, name=experiment_name)

In [10]:
from azureml.train.estimator import Estimator

script_params = {
    '--data_dir': ds_data_dir_nat,
    '--output_dir': './outputs',
    '--classifier': 'multinomial',
    '--number_of_samples': 3200
}

estimator_natural_data = Estimator(source_directory=project_folder, 
                                  script_params=script_params,
                                  compute_target=compute_target_three,
                                  entry_script='internal_hyperdrive_natural_data.py',
                                  use_gpu=True,
                                  conda_packages=['scikit-learn', 'matplotlib', 'Pillow', 'scikit-image'])

In [11]:
run_natural_data = experiment_nb_natural_data.submit(estimator_natural_data)
run_natural_data

Experiment,Id,Type,Status,Details Page,Docs Page
nb-hog-dom-selfmade-hyperdrive,nb-hog-dom-selfmade-hyperdrive_1540569161633,azureml.scriptrun,Queued,Link to Azure Portal,Link to Documentation


In [28]:
from azureml.train.widgets import RunDetails
RunDetails(run).show()

_UserRun()

In [32]:
from azureml.train.hyperdrive import *


ps = GridParameterSampling(
    {
        '--number_of_samples': choice(200, 400, 800, 1600, 3200),
        '--shape': choice('32', '64', '128', '256'),
        '--color_insensitive': choice(0, 1)
    }
)

early_termination_policy = BanditPolicy(slack_factor = 0.3, evaluation_interval=1, delay_evaluation=1)

hyperdrive_run_config = HyperDriveRunConfig(estimator=estimator_natural_data,
                                            hyperparameter_sampling=ps,
                                            policy=early_termination_policy,
                                            primary_metric_name="f_score",
                                            primary_metric_goal=PrimaryMetricGoal.MAXIMIZE,
                                            max_total_runs=100,
                                            max_concurrent_runs=4)
hyperdrive_run_config

<azureml.train.hyperdrive.runconfig.HyperDriveRunConfig at 0x261deb0bc18>

In [33]:
hyperdrive_run = experiment_nb.submit(hyperdrive_run_config)

In [37]:
from azureml.train.widgets import RunDetails
RunDetails(hyperdrive_run).show()

_HyperDrive(widget_settings={'childWidgetDisplay': 'popup'})

In [None]:
hyperdrive_run.wait_for_completion(show_output=True)

RunId: nb-hog-dom-natural-data_1540389673260


Exception ignored in: <bound method ClientBase.__del__ of <azureml._restclient.run_history_client.RunHistoryClient object at 0x00000261DF086240>>
Traceback (most recent call last):
  File "c:\users\thijs\appdata\local\programs\python\python36\lib\site-packages\azureml\_restclient\clientbase.py", line 71, in __del__
    self._pool.shutdown()
  File "c:\users\thijs\appdata\local\programs\python\python36\lib\site-packages\azureml\_async\worker_pool.py", line 29, in shutdown
    super(WorkerPool, self).shutdown(*args, **kwargs)
  File "c:\users\thijs\appdata\local\programs\python\python36\lib\concurrent\futures\thread.py", line 152, in shutdown
    t.join()
  File "c:\users\thijs\appdata\local\programs\python\python36\lib\threading.py", line 1053, in join
    raise RuntimeError("cannot join current thread")
RuntimeError: cannot join current thread
Exception ignored in: <bound method ClientBase.__del__ of <azureml._restclient.run_history_client.RunHistoryClient object at 0x00000261D8BFD4A8>

## Syntetic and Natural Data

In [6]:
# ds_data_dir_syn = ds.path('lego-vision-classification')
ds_data_dir_nat = ds.path('lego-vision-classification-natural-data-white-background')

print(ds_data_dir_nat)

$AZUREML_DATAREFERENCE_19e2bd49d2524e34bf9976c1d6237b9d


In [7]:
# ds_data_dir_syn = ds.path('lego-vision-classification')
ds_data_dir_syn = ds.path('lego-vision-classification-synthetic-data-white-background')

print(ds_data_dir_syn)

$AZUREML_DATAREFERENCE_04f6004ecbf24f02852ebd7a22d19b93


In [9]:
import os

project_folder = './nb-hog-dom-synthetic-data'
os.makedirs(project_folder, exist_ok=True)

In [10]:
import shutil
shutil.copy('feature_extraction_experiment.py', project_folder)

'./nb-hog-dom-synthetic-data\\feature_extraction_experiment.py'

In [11]:
shutil.copy('helpers.py', project_folder)

'./nb-hog-dom-synthetic-data\\helpers.py'

In [12]:
from azureml.core import Experiment

experiment_name = 'nb-hog-dom-synthetic-data'
experiment_nb_synthetic_data = Experiment(ws, name=experiment_name)

In [19]:
from azureml.train.estimator import Estimator

script_params = {
    '--data_dir': ds_data_dir_syn,
    '--test_dir': ds_data_dir_nat,
    '--output_dir': './outputs',
    '--classifier': 'multinomial',
    '--number_of_samples': 5
}

estimator_synthetic = Estimator(source_directory=project_folder, 
                      script_params=script_params,
                      compute_target=compute_target_three,
                      entry_script='feature_extraction_experiment.py',
                      use_gpu=True,
                      conda_packages=['scikit-learn', 'matplotlib', 'Pillow', 'scikit-image'])

In [20]:
run_synthetic = experiment_nb_synthetic_data.submit(estimator_synthetic)

Experiment,Id,Type,Status,Details Page,Docs Page
nb-hog-dom-synthetic-data,nb-hog-dom-synthetic-data_1540466376579,azureml.scriptrun,Queued,Link to Azure Portal,Link to Documentation


In [21]:
from azureml.train.widgets import RunDetails
RunDetails(run_synthetic).show()

_UserRun()

In [26]:
from azureml.train.hyperdrive import *


ps = GridParameterSampling(
    {
        '--number_of_samples': choice(200, 400, 800, 1000),
        '--shape': choice('32', '64', '128', '256'),
        '--color_insensitive': choice(0, 1)
    }
)

early_termination_policy = BanditPolicy(slack_factor = 0.3, evaluation_interval=1, delay_evaluation=1)

hyperdrive_run_config = HyperDriveRunConfig(estimator=estimator_synthetic,
                                            hyperparameter_sampling=ps,
                                            policy=early_termination_policy,
                                            primary_metric_name="f_score",
                                            primary_metric_goal=PrimaryMetricGoal.MAXIMIZE,
                                            max_total_runs=100,
                                            max_concurrent_runs=4)
hyperdrive_run_config

<azureml.train.hyperdrive.runconfig.HyperDriveRunConfig at 0x1bc4005f240>

In [27]:
hyperdrive_run_syn = experiment_nb_synthetic_data.submit(hyperdrive_run_config)

The same input parameter(s) are specified in estimator script params and HyperDrive parameter space. HyperDrive parameter space definition will override duplicate entries in estimator. ['--number_of_samples'] is the list of overridden parameter(s).


In [29]:
from azureml.train.widgets import RunDetails
RunDetails(hyperdrive_run_syn).show()

_HyperDrive(widget_settings={'childWidgetDisplay': 'popup'})