In [1]:
from azureml.core import Workspace, Experiment
ws = Workspace.from_config()

ws = Workspace.get(name='quick-starts-ws-124493',
                   subscription_id='374bdf1a-c648-4244-a317-f0d1ef4b85c7',
                   resource_group='aml-quickstarts-124493',
                   )

experiment = Experiment(ws, 'myexperiment')

In [2]:
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

# TOO Create compute cluster
cpu_cluster = "cpu-cluster"

# verify that the cluster is not exist already
try:
    compute_target = ComputeTarget(workspace=ws, name=cpu_cluster)
    print('cluster exist')
except ComputeTargetException:
    compute_gg = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2',
                                                          max_nodes=4)
    cpu_cluster = ComputeTarget.create(ws, cpu_cluster, compute_gg)

    cpu_cluster.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)


Creating
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned


In [3]:
from azureml.train.hyperdrive import normal, uniform, choice
from azureml.train.hyperdrive.run import PrimaryMetricGoal
from azureml.train.hyperdrive.sampling import RandomParameterSampling
from azureml.train.hyperdrive.policy import BanditPolicy
from azureml.train.hyperdrive.runconfig import HyperDriveConfig
from azureml.widgets import RunDetails
from azureml.train.hyperdrive.parameter_expressions import uniform, choice
from azureml.train.sklearn import SKLearn
import os

# Specify parameter sampler
ps = RandomParameterSampling(
    {
      '--C': choice( 0.25, 0.5, 0.75, 1.0, 2),
      '--max_iter': choice(100, 150, 200, 250, 300)
  }
)


In [4]:
# Specify policy
policy = BanditPolicy(
           slack_factor = 0.1,
           evaluation_interval = 2,
           delay_evaluation = 5)

In [5]:
if "training" not in os.listdir():
    os.mkdir("./training")

In [6]:
# Creat a SKLearn estimator for the use with train.py
est = SKLearn(source_directory = './',
                     entry_script = 'train22.py',
                     compute_target = cpu_cluster)

# Create a HyperDriveConfig using the estimator, hyperparameter sampler, and policy
hyperdrive_config = HyperDriveConfig(estimator = est,
                             hyperparameter_sampling=ps,
                             policy=policy,
                             primary_metric_name="Accuracy",
                             primary_metric_goal=PrimaryMetricGoal.MAXIMIZE,
                             max_total_runs=25,
                             max_concurrent_runs=4)

In [7]:
# Submit your hyperparameter run to the experiment
hyperdrive_run = experiment.submit(hyperdrive_config)



In [8]:
# show run details with the widgets
RunDetails(hyperdrive_run).show()
hyperdrive_run.wait_for_completion(show_output=True)

_HyperDriveWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO'…

RunId: HD_3d32e0f2-89da-4583-81cd-d97c4aab1f30
Web View: https://ml.azure.com/experiments/myexperiment/runs/HD_3d32e0f2-89da-4583-81cd-d97c4aab1f30?wsid=/subscriptions/374bdf1a-c648-4244-a317-f0d1ef4b85c7/resourcegroups/aml-quickstarts-124493/workspaces/quick-starts-ws-124493

Streaming azureml-logs/hyperdrive.txt

"<START>[2020-11-04T09:53:14.838264][API][INFO]Experiment created<END>\n"<START>[2020-11-04T09:53:16.1761944Z][SCHEDULER][INFO]The execution environment is being prepared. Please be patient as it can take a few minutes.<END>"<START>[2020-11-04T09:53:17.974809][GENERATOR][INFO]Trying to sample '4' jobs from the hyperparameter space<END>\n""<START>[2020-11-04T09:53:18.279464][GENERATOR][INFO]Successfully sampled '4' jobs, they will soon be submitted to the execution target.<END>\n"

Execution Summary
RunId: HD_3d32e0f2-89da-4583-81cd-d97c4aab1f30
Web View: https://ml.azure.com/experiments/myexperiment/runs/HD_3d32e0f2-89da-4583-81cd-d97c4aab1f30?wsid=/subscriptions/374bdf1a-c6

{'runId': 'HD_3d32e0f2-89da-4583-81cd-d97c4aab1f30',
 'target': 'cpu-cluster',
 'status': 'Completed',
 'startTimeUtc': '2020-11-04T09:53:14.55621Z',
 'endTimeUtc': '2020-11-04T10:09:01.1159Z',
 'properties': {'primary_metric_config': '{"name": "Accuracy", "goal": "maximize"}',
  'resume_from': 'null',
  'runTemplate': 'HyperDrive',
  'azureml.runsource': 'hyperdrive',
  'platform': 'AML',
  'ContentSnapshotId': '3a7a3144-2e57-45b8-aa98-e97294d00785',
  'score': '0.9102680829539707',
  'best_child_run_id': 'HD_3d32e0f2-89da-4583-81cd-d97c4aab1f30_0',
  'best_metric_status': 'Succeeded'},
 'inputDatasets': [],
 'outputDatasets': [],
 'logFiles': {'azureml-logs/hyperdrive.txt': 'https://mlstrg124493.blob.core.windows.net/azureml/ExperimentRun/dcid.HD_3d32e0f2-89da-4583-81cd-d97c4aab1f30/azureml-logs/hyperdrive.txt?sv=2019-02-02&sr=b&sig=7nLM%2Bg0MmqY2lnaxRXUCAgw0L1DOHMXBqXhoyV98eYI%3D&st=2020-11-04T09%3A59%3A12Z&se=2020-11-04T18%3A09%3A12Z&sp=r'}}

In [9]:
best_run = hyperdrive_run.get_best_run_by_primary_metric()
best_run_metrics = best_run.get_metrics()
parameter_values = best_run.get_details()['runDefinition']['arguments']
print(parameter_values)

['--C', '0.75', '--max_iter', '250']


In [11]:
import joblib
# Get your best run and save the model from that run.
print(best_run)
best_run_metrics=best_run.get_metrics()

# Get your best run id and accuracy
print("BEST RUN ID AND BEST RUN ACCURACY")
print("Best_run_id",best_run.id)
print("Best_run_accuracy",best_run_metrics['Accuracy'])

Run(Experiment: myexperiment,
Id: HD_3d32e0f2-89da-4583-81cd-d97c4aab1f30_0,
Type: azureml.scriptrun,
Status: Completed)
BEST RUN ID AND BEST RUN ACCURACY
Best_run_id HD_3d32e0f2-89da-4583-81cd-d97c4aab1f30_0
Best_run_accuracy 0.9102680829539707
