# Hyperparameter Tuning using HyperDrive
Import libraries

In [None]:
import numpy as np 
import pandas as pd 
import azureml.core
from azureml.core import Workspace, Experiment
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException
from azureml.widgets import RunDetails
from azureml.train.sklearn import SKLearn
from azureml.train.hyperdrive.run import PrimaryMetricGoal
from azureml.train.hyperdrive.policy import BanditPolicy
from azureml.train.hyperdrive.sampling import RandomParameterSampling
from azureml.train.hyperdrive.runconfig import HyperDriveConfig
from azureml.train.hyperdrive.parameter_expressions import uniform, choice
import os
import shutil
from azureml.core import Environment

# Check the core SDK version number
print("SDK version:", azureml.core.VERSION)

SDK version: 1.20.0


## Initialize Workspace and Experiment

In [None]:
# initialized a workspace from a persisted configuration
ws = Workspace.from_config()

project_folder = './hyper_drive_proj'
# choose a name for experiment
experiment_name = 'hyperd-heart-failure-exp'

experiment = Experiment(ws, experiment_name)

# get workspace details and print
output = {}
output['Subscription ID'] = ws.subscription_id
output['Workspace'] = ws.name
output['Resource Group'] = ws.resource_group 
output['Location'] = ws.location 
output['Experiment Name'] = experiment.name
pd.set_option('display.max_colwidth', -1)
output_df = pd.DataFrame(data = output, index=[''])
output_df.T

Unnamed: 0,Unnamed: 1
Subscription ID,61c5c3f0-6dc7-4ed9-a7f3-c704b20e3b30
Workspace,quick-starts-ws-136379
Resource Group,aml-quickstarts-136379
Location,southcentralus
Experiment Name,hyperd-heart-failure-exp


## Dataset

Getting the data into the workspace was done using TabularDataFactory module from Azure ML data/data_factory library. The code for this can be found in the `train.py` script.

## Create a compute target

Create or use an existing emote compute cluster in Azure ML Workspace

In [None]:
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

cluster_name = 'cpu-cluster'

try:
    # check for existing compute target
    training_cluster = ComputeTarget(
        workspace=ws,
        name=cluster_name,
    )
    print('Found existing cluster, use it.')
except:
    # create compute target if non exists
    try:
        compute_config = AmlCompute.provisioning_configuration(
            vm_size='STANDARD_DS12_V2',
            max_nodes=4
        )
        training_cluster = ComputeTarget.create(ws, cluster_name, compute_config)
        training_cluster.wait_for_completion(show_output=True)
    except Exception as ex:
        print(ex)

Found existing cluster, use it.


## Hyperdrive Configuration

In [None]:
# import libraries
from azureml.core import ScriptRunConfig, Environment

# Create an early termination policy. This is not required if you are using Bayesian sampling.
early_termination_policy = BanditPolicy(evaluation_interval=3, slack_factor=0.1, delay_evaluation=3)

# Create the different params that you will be using during training
param_sampling = RandomParameterSampling({
    '--C': uniform(0.001, 1.0),
    '--max_iter': choice(0, 10, 50, 100, 150, 200)
})

if "outputs" not in os.listdir():
    os.mkdir("./outputs")

train_script = "./outputs"

shutil.copy('train.py', train_script)


# Create estimator and hyperdrive config
sklearn_env = Environment.get(workspace=ws, name='AzureML-Tutorial')

src = ScriptRunConfig(
    source_directory=train_script,
    script='train.py',
    compute_target=training_cluster,
    environment=sklearn_env
    )

hyperdrive_run_config = HyperDriveConfig(
    run_config=src,
    hyperparameter_sampling=param_sampling,
    policy=early_termination_policy,
    primary_metric_name='Accuracy',
    primary_metric_goal=PrimaryMetricGoal.MAXIMIZE,
    max_total_runs=100,
    max_concurrent_runs=3
)

In [None]:
# Submit the experiment on the configured remote compute cluster
hyperdrive_run = experiment.submit(hyperdrive_run_config)

## Run Details


In [None]:
RunDetails(hyperdrive_run).show()

_HyperDriveWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO'…

In [None]:
hyperdrive_run.wait_for_completion(show_output=True)

RunId: HD_00dec3a1-a761-4c7f-ba94-766ad523975b
Web View: https://ml.azure.com/experiments/hyperd-heart-failure-exp/runs/HD_00dec3a1-a761-4c7f-ba94-766ad523975b?wsid=/subscriptions/61c5c3f0-6dc7-4ed9-a7f3-c704b20e3b30/resourcegroups/aml-quickstarts-136379/workspaces/quick-starts-ws-136379

Execution Summary
RunId: HD_00dec3a1-a761-4c7f-ba94-766ad523975b
Web View: https://ml.azure.com/experiments/hyperd-heart-failure-exp/runs/HD_00dec3a1-a761-4c7f-ba94-766ad523975b?wsid=/subscriptions/61c5c3f0-6dc7-4ed9-a7f3-c704b20e3b30/resourcegroups/aml-quickstarts-136379/workspaces/quick-starts-ws-136379



{'runId': 'HD_00dec3a1-a761-4c7f-ba94-766ad523975b',
 'target': 'cpu-cluster',
 'status': 'Completed',
 'startTimeUtc': '2021-01-28T14:12:17.779987Z',
 'endTimeUtc': '2021-01-28T15:11:53.505441Z',
 'properties': {'primary_metric_config': '{"name": "Accuracy", "goal": "maximize"}',
  'resume_from': 'null',
  'runTemplate': 'HyperDrive',
  'azureml.runsource': 'hyperdrive',
  'platform': 'AML',
  'ContentSnapshotId': 'a38fe6be-cd4b-499b-9950-41f23d2cf22e',
  'score': '0.8133333333333334',
  'best_child_run_id': 'HD_00dec3a1-a761-4c7f-ba94-766ad523975b_2',
  'best_metric_status': 'Succeeded'},
 'inputDatasets': [],
 'outputDatasets': [],
 'logFiles': {'azureml-logs/hyperdrive.txt': 'https://mlstrg136379.blob.core.windows.net/azureml/ExperimentRun/dcid.HD_00dec3a1-a761-4c7f-ba94-766ad523975b/azureml-logs/hyperdrive.txt?sv=2019-02-02&sr=b&sig=xy0HVcP1f4nRPMQUFsrJJdArYavcovRQ418Y0qOI%2BIc%3D&st=2021-01-28T15%3A02%3A42Z&se=2021-01-28T23%3A12%3A42Z&sp=r'},
 'submittedBy': 'ODL_User 136379'}

## Best Model

Get the best model from the hyperdrive experiments and display all the properties of the model.

In [None]:
# evaluate if the the run is indeed complete
assert(hyperdrive_run.get_status() == "Completed")

In [None]:
# get the best run and display the properties of the model
best_run = hyperdrive_run.get_best_run_by_primary_metric()
best_run_metrics = best_run.get_metrics()
parameter_values = best_run.get_details()['runDefinition']['arguments']

print('Best Run Id: ', best_run.id)
print('\n Accuracy:', best_run_metrics['Accuracy'])
print('\n Regularization Strength:',best_run_metrics['Regularization Strength:'])
print('\n Max iterations:',best_run_metrics['Max iterations:'])

Best Run Id:  HD_00dec3a1-a761-4c7f-ba94-766ad523975b_2

 Accuracy: 0.8133333333333334

 Regularization Strength: 0.2604336976688295

 Max iterations: 50


In [None]:
# the run id of the best hyperparameter model
best_run

Experiment,Id,Type,Status,Details Page,Docs Page
hyperd-heart-failure-exp,HD_00dec3a1-a761-4c7f-ba94-766ad523975b_2,azureml.scriptrun,Completed,Link to Azure Machine Learning studio,Link to Documentation


In [None]:
#TODO: Save the best model
# download the best run and register the model
best_run.download_file(name='outputs/model.joblib', output_file_path='./outputs')
model = best_run.register_model(model_name='hyperdrive_run', model_path='outputs/model.joblib')