# Hyperparameter Tuning using HyperDrive

In [2]:
from azureml.core import Dataset, Workspace, Experiment, Environment
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException
from azureml.widgets import RunDetails
from azureml.train.hyperdrive.policy import BanditPolicy
from azureml.train.hyperdrive.sampling import RandomParameterSampling
from azureml.train.hyperdrive.runconfig import HyperDriveConfig
from azureml.train.hyperdrive.parameter_expressions import uniform, choice
from azureml.train.sklearn import SKLearn
from azureml.train.hyperdrive.run import PrimaryMetricGoal


## Dataset

In [3]:
ws = Workspace.from_config()
experiment_name = 'hyperdrive_hr_analytics'

experiment=Experiment(ws, experiment_name)
data = ws.datasets["HR_Analytics"]

In [4]:
try:
    cluster_project = ComputeTarget(workspace=ws, name="aml-compute-capstone")
    print("Cluster exists")
except:
    config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2', max_nodes=4)
    cluster_project = ComputeTarget.create(ws, "aml-compute-capstone", config)

cluster_project.wait_for_completion()

Cluster exists


## Hyperdrive Configuration

For this technique, I decided to choose logistic regression as:

* It is the most basic algorithm when it comes to classification and one should always start from basic models
* It is easy to understand the results and simple to train
* The execution time is very fast

The hyperparameters that were used are:

* The regularization parameter was chosen from 0.001 to 1 to handle overfitting in the model.
* The total iterations was selected between 10 and 200.

In [5]:
# TODO: Create an early termination policy. This is not required if you are using Bayesian sampling.
early_termination_policy = BanditPolicy(evaluation_interval=2, slack_factor=0.1, delay_evaluation=5)

#TODO: Create the different params that you will be using during training
param_sampling = RandomParameterSampling({
    "--C": choice(0.001, 0.01, 0.1, 1),
    "--max_iter": choice(10, 25, 50, 200)
})
#TODO: Create your estimator and hyperdrive config
estimator = SKLearn(source_directory='.', compute_target=cluster_project, entry_script='train.py')

hyperdrive_run_config = HyperDriveConfig(
    estimator=estimator,
    hyperparameter_sampling=param_sampling,
    policy=early_termination_policy,
    primary_metric_name="Accuracy",
    primary_metric_goal=PrimaryMetricGoal.MAXIMIZE,
    max_total_runs=20,
    max_concurrent_runs=5
)

'SKLearn' estimator is deprecated. Please use 'ScriptRunConfig' from 'azureml.core.script_run_config' with your own defined environment or the AzureML-Tutorial curated environment.


In [6]:

hyperdrive_run = experiment.submit(hyperdrive_run_config)



## Run Details


In [7]:
from azureml.widgets import RunDetails

RunDetails(hyperdrive_run).show()

hyperdrive_run.wait_for_completion(show_output=True)

_HyperDriveWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO'…

RunId: HD_e613df82-5359-4a95-abb4-321ce2c1def9
Web View: https://ml.azure.com/experiments/hyperdrive_hr_analytics/runs/HD_e613df82-5359-4a95-abb4-321ce2c1def9?wsid=/subscriptions/d7f39349-a66b-446e-aba6-0053c2cf1c11/resourcegroups/aml-quickstarts-137947/workspaces/quick-starts-ws-137947

Streaming azureml-logs/hyperdrive.txt

"<START>[2021-02-07T18:08:02.370194][API][INFO]Experiment created<END>\n""<START>[2021-02-07T18:08:02.915725][GENERATOR][INFO]Trying to sample '5' jobs from the hyperparameter space<END>\n""<START>[2021-02-07T18:08:03.262210][GENERATOR][INFO]Successfully sampled '5' jobs, they will soon be submitted to the execution target.<END>\n"<START>[2021-02-07T18:08:04.5707609Z][SCHEDULER][INFO]The execution environment is being prepared. Please be patient as it can take a few minutes.<END>

Execution Summary
RunId: HD_e613df82-5359-4a95-abb4-321ce2c1def9
Web View: https://ml.azure.com/experiments/hyperdrive_hr_analytics/runs/HD_e613df82-5359-4a95-abb4-321ce2c1def9?wsid=/sub

{'runId': 'HD_e613df82-5359-4a95-abb4-321ce2c1def9',
 'target': 'aml-compute-capstone',
 'status': 'Completed',
 'startTimeUtc': '2021-02-07T18:08:02.054969Z',
 'endTimeUtc': '2021-02-07T18:20:56.129014Z',
 'properties': {'primary_metric_config': '{"name": "Accuracy", "goal": "maximize"}',
  'resume_from': 'null',
  'runTemplate': 'HyperDrive',
  'azureml.runsource': 'hyperdrive',
  'platform': 'AML',
  'ContentSnapshotId': '0067b1a9-c63a-4086-b988-0189af4dddf9',
  'score': '0.8426928281461434',
  'best_child_run_id': 'HD_e613df82-5359-4a95-abb4-321ce2c1def9_3',
  'best_metric_status': 'Succeeded'},
 'inputDatasets': [],
 'outputDatasets': [],
 'logFiles': {'azureml-logs/hyperdrive.txt': 'https://mlstrg137947.blob.core.windows.net/azureml/ExperimentRun/dcid.HD_e613df82-5359-4a95-abb4-321ce2c1def9/azureml-logs/hyperdrive.txt?sv=2019-02-02&sr=b&sig=%2BOOG5Nbc96t%2BvT5LgCwI50IQFLXdJdMBAztCI2YBIyY%3D&st=2021-02-07T18%3A11%3A20Z&se=2021-02-08T02%3A21%3A20Z&sp=r'},
 'submittedBy': 'ODL_User 

## Best Model


In [8]:

# Get your best run and save the model from that run.
best_run = hyperdrive_run.get_best_run_by_primary_metric()
best_run_metrics = best_run.get_metrics()
parameter_values = best_run.get_details()['runDefinition']['arguments']

print('Best Run Id: ', best_run.id)
print('\n Accuracy:', best_run_metrics['Accuracy'])

Best Run Id:  HD_e613df82-5359-4a95-abb4-321ce2c1def9_3

 Accuracy: 0.8426928281461434


In [9]:
best_run.register_model("best_model", "outputs/hyperdrive_model.joblib")

Model(workspace=Workspace.create(name='quick-starts-ws-137947', subscription_id='d7f39349-a66b-446e-aba6-0053c2cf1c11', resource_group='aml-quickstarts-137947'), name=best_model, id=best_model:1, version=1, tags={}, properties={})