## Configure Azure computing resources

In [1]:
from azureml.core import Workspace, Experiment

ws = Workspace.from_config()
exp = Experiment(workspace=ws, name="hyperdrive-experiment")

print('Workspace name: ' + ws.name, 
      'Azure region: ' + ws.location, 
      'Subscription id: ' + ws.subscription_id, 
      'Resource group: ' + ws.resource_group, sep = '\n')

run = exp.start_logging()

Workspace name: quick-starts-ws-130665
Azure region: southcentralus
Subscription id: a7e82b92-bbed-4497-a8df-009b71b94f9c
Resource group: aml-quickstarts-130665


## Create or attach a compute cluster

In [2]:
from azureml.core.compute import ComputeTarget, AmlCompute

# Create compute cluster
from azureml.core.compute_target import ComputeTargetException

vm_size = "Standard_D12_V2"


# The name for the CPU cluster
cpu_cluster = "cpu-cluster"

# Verify that cluster does not exist already
try:
    cpu_cluster = ComputeTarget(workspace=ws, name=cpu_cluster)
    print('Found existing cluster, use it.')
except ComputeTargetException:
    compute_config = AmlCompute.provisioning_configuration(vm_size=vm_size,
                                                           max_nodes=5)
    cpu_cluster = ComputeTarget.create(ws, cpu_cluster, compute_config)

cpu_cluster.wait_for_completion(show_output=True)


Found existing cluster, use it.
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned


## Configure hyperdrive tuning parameters 

In [3]:
from azureml.widgets import RunDetails
from azureml.train.sklearn import SKLearn
from azureml.train.hyperdrive.run import PrimaryMetricGoal
from azureml.train.hyperdrive.policy import BanditPolicy
from azureml.train.hyperdrive.sampling import RandomParameterSampling
from azureml.train.hyperdrive.runconfig import HyperDriveConfig
from azureml.train.hyperdrive.parameter_expressions import uniform, choice
import os
import shutil

# Specify parameter sampler
ps = RandomParameterSampling({
        "--C": uniform(0.1,1),
        "--max_iter": choice(50, 100, 150, 200)
    })

# Specify a Policy
policy = BanditPolicy(evaluation_interval = 3, slack_factor = 0.1, delay_evaluation = 3)

# Create a SKLearn estimator for use with train.py
#shutil.copy('train-Copy1.py', "./training")

# Create a SKLearn estimator for use with train.py
est = SKLearn(source_directory='.', compute_target=cpu_cluster, 
              entry_script='train.py')

# Create a HyperDriveConfig using the estimator, hyperparameter sampler, and policy.
hyperdrive_config = HyperDriveConfig(estimator=est,
                                hyperparameter_sampling=ps,
                                policy= policy,
                                primary_metric_name='Accuracy',
                                primary_metric_goal=PrimaryMetricGoal.MAXIMIZE,
                                max_total_runs=100,
                                max_concurrent_runs=4)
                                ### YOUR CODE HERE###

'SKLearn' estimator is deprecated. Please use 'ScriptRunConfig' from 'azureml.core.script_run_config' with your own defined environment or the AzureML-Tutorial curated environment.


In [4]:
# Submit the hyperdrive run to the experiment and show run details with the widget.
hyperdrive_run = exp.submit(hyperdrive_config, show_output=True)
RunDetails(hyperdrive_run).show()
hyperdrive_run.get_status()
hyperdrive_run.wait_for_completion(show_output=True)



_HyperDriveWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO'…

RunId: HD_e817c069-59de-487a-9a53-0a56d086d05c
Web View: https://ml.azure.com/experiments/hyperdrive-experiment/runs/HD_e817c069-59de-487a-9a53-0a56d086d05c?wsid=/subscriptions/a7e82b92-bbed-4497-a8df-009b71b94f9c/resourcegroups/aml-quickstarts-130665/workspaces/quick-starts-ws-130665

Streaming azureml-logs/hyperdrive.txt

"<START>[2020-12-16T03:21:49.827828][API][INFO]Experiment created<END>\n"<START>[2020-12-16T03:21:50.9797492Z][SCHEDULER][INFO]The execution environment is being prepared. Please be patient as it can take a few minutes.<END>"<START>[2020-12-16T03:21:52.718334][GENERATOR][INFO]Trying to sample '4' jobs from the hyperparameter space<END>\n""<START>[2020-12-16T03:21:53.201084][GENERATOR][INFO]Successfully sampled '4' jobs, they will soon be submitted to the execution target.<END>\n"

Execution Summary
RunId: HD_e817c069-59de-487a-9a53-0a56d086d05c
Web View: https://ml.azure.com/experiments/hyperdrive-experiment/runs/HD_e817c069-59de-487a-9a53-0a56d086d05c?wsid=/subscri

{'runId': 'HD_e817c069-59de-487a-9a53-0a56d086d05c',
 'target': 'cpu-cluster',
 'status': 'Completed',
 'startTimeUtc': '2020-12-16T03:21:49.565941Z',
 'endTimeUtc': '2020-12-16T04:03:13.926882Z',
 'properties': {'primary_metric_config': '{"name": "Accuracy", "goal": "maximize"}',
  'resume_from': 'null',
  'runTemplate': 'HyperDrive',
  'azureml.runsource': 'hyperdrive',
  'platform': 'AML',
  'ContentSnapshotId': 'da9b937a-d4c8-45cd-b4c5-5b7b5f635035',
  'score': '0.7833333333333333',
  'best_child_run_id': 'HD_e817c069-59de-487a-9a53-0a56d086d05c_7',
  'best_metric_status': 'Succeeded'},
 'inputDatasets': [],
 'outputDatasets': [],
 'logFiles': {'azureml-logs/hyperdrive.txt': 'https://mlstrg130665.blob.core.windows.net/azureml/ExperimentRun/dcid.HD_e817c069-59de-487a-9a53-0a56d086d05c/azureml-logs/hyperdrive.txt?sv=2019-02-02&sr=b&sig=8AC7aypgo6nIgv9qBNNWH%2BZLXTcStCvrYJGpuEApOLE%3D&st=2020-12-16T03%3A53%3A33Z&se=2020-12-16T12%3A03%3A33Z&sp=r'}}

## Retrive and save the best model

In [5]:
import joblib
# Get the best run and save the model from that run.

best_run = hyperdrive_run.get_best_run_by_primary_metric()
best_run_metrics = best_run.get_metrics()
parameter_values = best_run.get_details()['runDefinition']['arguments']

print('Best Run ID: ', best_run.id)
print('\n Metrics: ', best_run_metrics)
print('\n Parameters: ', parameter_values)

Best Run ID:  HD_e817c069-59de-487a-9a53-0a56d086d05c_7

 Metrics:  {'Regularization Strength:': 0.8341013024377794, 'Max iterations:': 200, 'Accuracy': 0.7833333333333333}

 Parameters:  ['--C', '0.8341013024377794', '--max_iter', '200']


In [6]:
best_run

Experiment,Id,Type,Status,Details Page,Docs Page
hyperdrive-experiment,HD_e817c069-59de-487a-9a53-0a56d086d05c_7,azureml.scriptrun,Completed,Link to Azure Machine Learning studio,Link to Documentation


In [7]:
best_run.id

'HD_e817c069-59de-487a-9a53-0a56d086d05c_7'

In [9]:
best_run.download_file('/outputs/model.joblib', 'hyperdrive_model.joblib')

In [10]:
# check 
best_run.get_file_names()

['azureml-logs/55_azureml-execution-tvmps_38ab81031ab16ed71cac848a493d1d948bc1f98651cf961dd57deb80c6ac4d77_d.txt',
 'azureml-logs/65_job_prep-tvmps_38ab81031ab16ed71cac848a493d1d948bc1f98651cf961dd57deb80c6ac4d77_d.txt',
 'azureml-logs/70_driver_log.txt',
 'azureml-logs/75_job_post-tvmps_38ab81031ab16ed71cac848a493d1d948bc1f98651cf961dd57deb80c6ac4d77_d.txt',
 'azureml-logs/process_info.json',
 'azureml-logs/process_status.json',
 'logs/azureml/103_azureml.log',
 'logs/azureml/job_prep_azureml.log',
 'logs/azureml/job_release_azureml.log',
 'outputs/model.joblib']

In [12]:
import joblib
#loaded_best_model = joblib.load('hyperdrive_best_model.joblib')
loaded_best_model = joblib.load('hyperdrive_model.joblib')

The sklearn.linear_model.logistic module is  deprecated in version 0.22 and will be removed in version 0.24. The corresponding classes / functions should instead be imported from sklearn.linear_model. Anything that cannot be imported from sklearn.linear_model is now part of the private API.
Trying to unpickle estimator LogisticRegression from version 0.20.3 when using version 0.22.2.post1. This might lead to breaking code or invalid results. Use at your own risk.


## Register the best model
note: register the best model in the account before existing the notebook; still have not figured out how to save the trained model... looks like the download method shown earlier should work but how to reuse it; perhaps could use pickle file?

In [7]:
# register the best model 
#best_run.register_model(model_name = 'best_regression_model', model_path = 'outputs/model.joblib')

In [13]:
best_model_registered = best_run.register_model(model_name = 'best_hyperdrive_model', model_path='outputs/model.joblib', tags={'accuracy':0.783}, description='best hyperdrive training model')