# Hyperparameter Tuning using HyperDrive

In [1]:
from azureml.core import Workspace, Experiment
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core import Environment, ScriptRunConfig
from azureml.widgets import RunDetails
from azureml.train.hyperdrive.run import PrimaryMetricGoal
from azureml.train.hyperdrive.policy import BanditPolicy
from azureml.train.hyperdrive.sampling import RandomParameterSampling
from azureml.train.hyperdrive.runconfig import HyperDriveConfig
from azureml.train.hyperdrive.parameter_expressions import choice
import pandas as pd
import os
import joblib

In [2]:
ws = Workspace.from_config()
ws.write_config(path='.azureml')
experiment_name = 'capstone-gridstability-hd'

exp = Experiment(ws, experiment_name)

print('Workspace name: ' + ws.name, 
      'Azure region: ' + ws.location, 
      'Subscription id: ' + ws.subscription_id, 
      'Resource group: ' + ws.resource_group, sep = '\n')

run = exp.start_logging()

print(ws)

Workspace name: quick-starts-ws-187665
Azure region: southcentralus
Subscription id: d7f39349-a66b-446e-aba6-0053c2cf1c11
Resource group: aml-quickstarts-187665
Workspace.create(name='quick-starts-ws-187665', subscription_id='d7f39349-a66b-446e-aba6-0053c2cf1c11', resource_group='aml-quickstarts-187665')


In [3]:
cpu_cluster_name = "my-cluster"

# Verify that cluster does not exist already
try:
    compute_target = ComputeTarget(workspace=ws, name=cpu_cluster_name)
    print("Cluster already exists. Use it.")
except ComputeTargetException:
    compute_config = AmlCompute.provisioning_configuration(vm_size="Standard_D2_V2", max_nodes=4)
    compute_target = ComputeTarget.create(ws, cpu_cluster_name, compute_config)
    
compute_target.wait_for_completion(show_output=True)

Cluster already exists. Use it.
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned


In [4]:
env = Environment.get(workspace = ws, name = "AzureML-sklearn-0.24-ubuntu18.04-py37-cpu")

In [5]:
if "training" not in os.listdir():
    os.mkdir("./training")

In [6]:
src = ScriptRunConfig(source_directory = ".",
                      script = "train.py",
                      compute_target = "my-cluster",
                      environment = env
                     )

## Dataset

### Overview

The dataset "Electrical Grid Stability Simulated Data" was obtained from the UCI Machine Learning Repository and can be found [here](https://archive.ics.uci.edu/ml/datasets/Electrical+Grid+Stability+Simulated+Data+#).

The dataset consists of 10000 observations of an electric power grid with four nodes in total, one with electricity generation and three with electricity consumption:

![Diagram](/img/0-Diagram.png)

The dataset contains the following features (node 1 refers to the electricty producer, whereas nodes 2 to 4 refers to the electricity consumer):

    - tau: reaction time of each participant (1 to 4).
    - p: nominal power consumed or generated of each node (1 to 4). A negative value indicates a net consumption, whereas a positive value indicates net generation.
    - g: coefficient (gamma) proportional to price elasticity (1 to 4).
    - stab: the maximal real part of the characteristic equation root. A positive value indicates that the system is linearly unstable.
    - stabf: the stability label of the system. This is a categorical feature: stable/unstable.

The aim of this project is to develop a machine learning model that can predict the stability of the system based on the features of the dataset.
Therefore, it is a classification problem in which we want to predict the stabf value.

In [7]:
os.listdir()

['.amlignore',
 '.amlignore.amltmp',
 '.azureml',
 '.ipynb_aml_checkpoints',
 'automl.ipynb',
 'data',
 'endpoint.py',
 'hyperparameter_tuning.ipynb',
 'hyperparameter_tuning.ipynb.amltmp',
 'img',
 'outputs',
 'train.py',
 'training']

In [8]:
ds = pd.read_csv("data/gridstability.csv")

## Hyperdrive Configuration

HyperDrive is used to tune the hyperparameters of a Logistic Regression model, which are Regularization Strength (C) and Maximum Number of Iteratations (max_iter). A Logistic Regression is a good choice for this classification problem in order to stablish a baseline.

Random Parameter Sampling randomly selects hyperparameters to evaluate, which makes the hyperparameter tuning convergence fast.

Bandit Policy is an early termination policy based on slack factor/slack amount and evaluation interval. Any run that doesn't fall within the slack factor or slack amount of the evaluation metric with respect to the best performing run will be terminated.

In this problem, we want to maximize the accuracy of the model.

In [10]:
# Specify parameter sampler
ps = RandomParameterSampling({"--C":choice(0.001,0.01,0.1,1,10,20,50,100,200,500,1000),
                             "--max_iter":choice(50,100,200,300)})

# Specify a Policy
policy = BanditPolicy(evaluation_interval=2, slack_factor=0.1, delay_evaluation=1)

In [11]:
# Create a HyperDriveConfig using the estimator, hyperparameter sampler, and policy.
hd_config = HyperDriveConfig(run_config=src,
                             hyperparameter_sampling=ps,
                             primary_metric_name="Accuracy",
                             primary_metric_goal=PrimaryMetricGoal.MAXIMIZE,
                             policy=policy,
                             max_total_runs=16,
                             max_concurrent_runs=4)

## Run Details

In [12]:
# Submit the hyperdrive run to the experiment and show run details with the widget.
hd_run = exp.submit(hd_config)

# The widget RunDetails can be used to monitor the run
RunDetails(hd_run).show()

_HyperDriveWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO'…

In [13]:
hd_run.wait_for_completion(show_output=True)

RunId: HD_c326a62b-5215-46af-8481-70551b24c57c
Web View: https://ml.azure.com/runs/HD_c326a62b-5215-46af-8481-70551b24c57c?wsid=/subscriptions/d7f39349-a66b-446e-aba6-0053c2cf1c11/resourcegroups/aml-quickstarts-187665/workspaces/quick-starts-ws-187665&tid=660b3398-b80e-49d2-bc5b-ac1dc93b5254

Streaming azureml-logs/hyperdrive.txt

"<START>[2022-02-28T09:35:24.310704][API][INFO]Experiment created<END>\n""<START>[2022-02-28T09:35:25.387050][GENERATOR][INFO]Trying to sample '4' jobs from the hyperparameter space<END>\n""<START>[2022-02-28T09:35:26.010733][GENERATOR][INFO]Successfully sampled '4' jobs, they will soon be submitted to the execution target.<END>\n"

Execution Summary
RunId: HD_c326a62b-5215-46af-8481-70551b24c57c
Web View: https://ml.azure.com/runs/HD_c326a62b-5215-46af-8481-70551b24c57c?wsid=/subscriptions/d7f39349-a66b-446e-aba6-0053c2cf1c11/resourcegroups/aml-quickstarts-187665/workspaces/quick-starts-ws-187665&tid=660b3398-b80e-49d2-bc5b-ac1dc93b5254



{'runId': 'HD_c326a62b-5215-46af-8481-70551b24c57c',
 'target': 'my-cluster',
 'status': 'Completed',
 'startTimeUtc': '2022-02-28T09:35:24.045163Z',
 'endTimeUtc': '2022-02-28T09:45:16.962265Z',
 'services': {},
 'properties': {'primary_metric_config': '{"name": "Accuracy", "goal": "maximize"}',
  'resume_from': 'null',
  'runTemplate': 'HyperDrive',
  'azureml.runsource': 'hyperdrive',
  'platform': 'AML',
  'ContentSnapshotId': '73071aa3-cec4-4237-8274-1dfdc5751679',
  'user_agent': 'python/3.8.1 (Linux-5.4.0-1068-azure-x86_64-with-glibc2.10) msrest/0.6.21 Hyperdrive.Service/1.0.0 Hyperdrive.SDK/core.1.38.0',
  'space_size': '44',
  'score': '0.817',
  'best_child_run_id': 'HD_c326a62b-5215-46af-8481-70551b24c57c_0',
  'best_metric_status': 'Succeeded'},
 'inputDatasets': [],
 'outputDatasets': [],
 'logFiles': {'azureml-logs/hyperdrive.txt': 'https://mlstrg187665.blob.core.windows.net/azureml/ExperimentRun/dcid.HD_c326a62b-5215-46af-8481-70551b24c57c/azureml-logs/hyperdrive.txt?sv=

In [14]:
print(hd_run.get_children_sorted_by_primary_metric(top=0, reverse=False, discard_no_metric=False))

[{'run_id': 'HD_c326a62b-5215-46af-8481-70551b24c57c_13', 'hyperparameters': '{"--C": 1, "--max_iter": 50}', 'best_primary_metric': 0.817, 'status': 'Completed'}, {'run_id': 'HD_c326a62b-5215-46af-8481-70551b24c57c_12', 'hyperparameters': '{"--C": 20, "--max_iter": 50}', 'best_primary_metric': 0.817, 'status': 'Completed'}, {'run_id': 'HD_c326a62b-5215-46af-8481-70551b24c57c_0', 'hyperparameters': '{"--C": 500, "--max_iter": 50}', 'best_primary_metric': 0.817, 'status': 'Completed'}, {'run_id': 'HD_c326a62b-5215-46af-8481-70551b24c57c_9', 'hyperparameters': '{"--C": 1000, "--max_iter": 50}', 'best_primary_metric': 0.8165, 'status': 'Completed'}, {'run_id': 'HD_c326a62b-5215-46af-8481-70551b24c57c_14', 'hyperparameters': '{"--C": 1000, "--max_iter": 300}', 'best_primary_metric': 0.816, 'status': 'Completed'}, {'run_id': 'HD_c326a62b-5215-46af-8481-70551b24c57c_15', 'hyperparameters': '{"--C": 20, "--max_iter": 300}', 'best_primary_metric': 0.816, 'status': 'Completed'}, {'run_id': 'HD_c

## Best Model

In [15]:
print(hd_run.get_children_sorted_by_primary_metric(top=1, reverse=False, discard_no_metric=False))

[{'run_id': 'HD_c326a62b-5215-46af-8481-70551b24c57c_13', 'hyperparameters': '{"--C": 1, "--max_iter": 50}', 'best_primary_metric': 0.817, 'status': 'Completed'}]


In [16]:
best_run = hd_run.get_best_run_by_primary_metric()
best_run.get_file_names()

['outputs/model.pkl',
 'system_logs/cs_capability/cs-capability.log',
 'system_logs/hosttools_capability/hosttools-capability.log',
 'system_logs/lifecycler/execution-wrapper.log',
 'system_logs/lifecycler/lifecycler.log',
 'system_logs/lifecycler/vm-bootstrapper.log',
 'user_logs/std_log.txt']

In [17]:
best_run.get_metrics()

{'Regularization Strength:': 500.0, 'Max iterations:': 50, 'Accuracy': 0.817}

In [18]:
best_run.get_details()

{'runId': 'HD_c326a62b-5215-46af-8481-70551b24c57c_0',
 'target': 'my-cluster',
 'status': 'Completed',
 'startTimeUtc': '2022-02-28T09:37:54.921606Z',
 'endTimeUtc': '2022-02-28T09:37:58.613509Z',
 'services': {},
 'properties': {'_azureml.ComputeTargetType': 'amlcompute',
  'ContentSnapshotId': '73071aa3-cec4-4237-8274-1dfdc5751679',
  'ProcessInfoFile': 'azureml-logs/process_info.json',
  'ProcessStatusFile': 'azureml-logs/process_status.json'},
 'inputDatasets': [],
 'outputDatasets': [],
 'runDefinition': {'script': 'train.py',
  'command': '',
  'useAbsolutePath': False,
  'arguments': ['--C', '500', '--max_iter', '50'],
  'sourceDirectoryDataStore': None,
  'framework': 'Python',
  'communicator': 'None',
  'target': 'my-cluster',
  'dataReferences': {},
  'data': {},
  'outputData': {},
  'datacaches': [],
  'jobName': None,
  'maxRunDurationSeconds': 2592000,
  'nodeCount': 1,
  'instanceTypes': [],
  'priority': None,
  'credentialPassthrough': False,
  'identity': None,
  'e

In [19]:
best_run.register_model(model_name = 'best_model_hd', model_path='outputs/model.pkl')

Model(workspace=Workspace.create(name='quick-starts-ws-187665', subscription_id='d7f39349-a66b-446e-aba6-0053c2cf1c11', resource_group='aml-quickstarts-187665'), name=best_model_hd, id=best_model_hd:1, version=1, tags={}, properties={})