# Hyperparameter Tuning using HyperDrive

TODO: Import Dependencies. In the cell below, import all the dependencies that you will need to complete the project.

In [1]:
# import dependencies
from azureml.core import Workspace, Experiment
from azureml.core.dataset import Dataset
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException
from azureml.widgets import RunDetails
from azureml.train.sklearn import SKLearn
from azureml.train.hyperdrive.run import PrimaryMetricGoal
from azureml.train.hyperdrive.policy import BanditPolicy
from azureml.train.hyperdrive.sampling import RandomParameterSampling
from azureml.train.hyperdrive.runconfig import HyperDriveConfig
from azureml.train.hyperdrive.parameter_expressions import uniform, choice
import os

In [5]:
ws = Workspace.from_config()
experiment_name = 'hydraulic-hyperdrive-experiment'

experiment=Experiment(ws, experiment_name)

Performing interactive authentication. Please follow the instructions on the terminal.
To sign in, use a web browser to open the page https://microsoft.com/devicelogin and enter the code EZEQHXF3S to authenticate.
You have logged in. Now let us find all the subscriptions to which you have access...
Interactive authentication successfully completed.


In [6]:
# get the data
# Try to load the dataset from the Workspace. Otherwise, create it from the file
# NOTE: update the key to match the dataset name
# Data Source : https://archive.ics.uci.edu/ml/datasets/Condition+monitoring+of+hydraulic+systems
found = False
key = "Hydraulic Systems Data"
description_text = "Condition monitoring of hydraulic systems Data Set"

if key in ws.datasets.keys(): 
        found = True
        dataset = ws.datasets[key] 

if not found:
        # Create AML Dataset and register it into Workspace
        example_data = 'https://raw.githubusercontent.com/chamsun-imoggo/udacityms-3rdproject/main/data/hydraulic_systems_training.csv'
        dataset = Dataset.Tabular.from_delimited_files(example_data)        
        #Register Dataset in Workspace
        dataset = dataset.register(workspace=ws,
                                   name=key,
                                   description=description_text)


df = dataset.to_pandas_dataframe()
df.describe()

Unnamed: 0,PS1,PS2,PS3,PS4,PS5,PS6,FS1,FS2,TS1,TS2,TS3,TS4,P1,VS1,CE1,CP1,SE1,STABILITY
count,2094.0,2094.0,2094.0,2094.0,2094.0,2094.0,2094.0,2094.0,2094.0,2094.0,2094.0,2094.0,2094.0,2094.0,2094.0,2094.0,2094.0,2094.0
mean,160.461089,109.383521,1.74238,2.199789,9.122855,9.040759,6.179498,9.621939,45.94537,50.850958,48.14585,41.270196,2492.650573,0.616699,30.479281,1.789732,55.173831,0.360076
std,4.820589,5.116599,0.253804,4.012051,0.563162,0.537043,1.056177,0.444382,7.865514,7.275285,7.336484,7.971456,74.670327,0.059958,11.301936,0.273144,9.175962,0.480137
min,155.391547,104.406307,0.840252,0.0,8.365801,8.321527,2.018572,8.857513,35.384333,40.978767,38.340283,30.47055,2361.747267,0.524367,17.555983,1.06215,18.276617,0.0
25%,157.986015,106.913668,1.727014,0.0,8.535713,8.478621,6.37904,9.197996,36.309758,41.923621,39.204996,31.341463,2441.16795,0.560175,20.032158,1.545979,56.276721,0.0
50%,158.845991,107.607607,1.76636,0.0,9.066202,8.98583,6.579164,9.668634,45.556025,50.5001,47.76675,41.061425,2475.461567,0.612275,27.31445,1.7348,58.797425,0.0
75%,160.983205,109.415065,1.923863,1.266499,9.8353,9.720552,6.655778,10.146995,54.193537,58.723546,55.751542,49.492954,2547.338367,0.653071,46.610821,2.140912,59.687908,1.0
max,180.922708,131.589089,2.023398,10.182837,9.976781,9.856591,6.722707,10.403098,57.899283,61.958467,59.423167,53.060417,2740.641,0.839067,47.903667,2.8401,60.7553,1.0


In [7]:
from azureml.core.compute import AmlCompute
from azureml.core.compute import ComputeTarget
from azureml.core.compute_target import ComputeTargetException

# NOTE: update the cluster name to match the existing cluster
# Choose a name for your CPU cluster
amlcompute_cluster_name = "compute-ml"

# Verify that cluster does not exist already
try:
    compute_target = ComputeTarget(ws, amlcompute_cluster_name)
    print(f"{compute_name} exists already")
except:
    compute_config = AmlCompute.provisioning_configuration(vm_size="Standard_DS14_v2", max_nodes=10)
    compute_target = ComputeTarget.create(ws, amlcompute_cluster_name, compute_config)
compute_target.wait_for_completion(show_output=True)


Creating
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned


## Hyperdrive Configuration

TODO: Explain the model you are using and the reason for chosing the different hyperparameters, termination policy and config settings.

In [8]:
# TODO: Create an early termination policy. This is not required if you are using Bayesian sampling.
early_termination_policy = BanditPolicy(slack_factor = 0.1, evaluation_interval=1, delay_evaluation=5)

#TODO: Create the different params that you will be using during training
param_sampling = RandomParameterSampling( {
        "--C": uniform(0.001, 1.0),
        "--max_iter": choice(100,125,150,175,200)
    }
)

#TODO: Create your estimator and hyperdrive config
estimator = SKLearn(source_directory=os.path.join('./'),compute_target=compute_target,entry_script='train.py')


hyperdrive_run_config = HyperDriveConfig(estimator=estimator,
                                hyperparameter_sampling=param_sampling,
                                policy=early_termination_policy,
                                primary_metric_name='Accuracy',
                                primary_metric_goal=PrimaryMetricGoal.MAXIMIZE,
                                max_total_runs=10)

'SKLearn' estimator is deprecated. Please use 'ScriptRunConfig' from 'azureml.core.script_run_config' with your own defined environment or the AzureML-Tutorial curated environment.


In [9]:
#TODO: Submit your experiment
hyperdrive_run = experiment.submit(hyperdrive_run_config)
RunDetails(hyperdrive_run).show()
hyperdrive_run.wait_for_completion(show_output=True)



_HyperDriveWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO'…

RunId: HD_ca8ddeb8-87c6-44a6-b2ba-d7fee5d19373
Web View: https://ml.azure.com/experiments/hydraulic-hyperdrive-experiment/runs/HD_ca8ddeb8-87c6-44a6-b2ba-d7fee5d19373?wsid=/subscriptions/610d6e37-4747-4a20-80eb-3aad70a55f43/resourcegroups/aml-quickstarts-140391/workspaces/quick-starts-ws-140391

Streaming azureml-logs/hyperdrive.txt

"<START>[2021-03-14T05:12:57.744582][API][INFO]Experiment created<END>\n""<START>[2021-03-14T05:12:58.504167][GENERATOR][INFO]Trying to sample '10' jobs from the hyperparameter space<END>\n""<START>[2021-03-14T05:12:58.684758][GENERATOR][INFO]Successfully sampled '10' jobs, they will soon be submitted to the execution target.<END>\n"<START>[2021-03-14T05:12:58.8837665Z][SCHEDULER][INFO]The execution environment is being prepared. Please be patient as it can take a few minutes.<END>

Execution Summary
RunId: HD_ca8ddeb8-87c6-44a6-b2ba-d7fee5d19373
Web View: https://ml.azure.com/experiments/hydraulic-hyperdrive-experiment/runs/HD_ca8ddeb8-87c6-44a6-b2ba-d7fe

{'runId': 'HD_ca8ddeb8-87c6-44a6-b2ba-d7fee5d19373',
 'target': 'compute-ml',
 'status': 'Completed',
 'startTimeUtc': '2021-03-14T05:12:57.536357Z',
 'endTimeUtc': '2021-03-14T05:19:44.533031Z',
 'properties': {'primary_metric_config': '{"name": "Accuracy", "goal": "maximize"}',
  'resume_from': 'null',
  'runTemplate': 'HyperDrive',
  'azureml.runsource': 'hyperdrive',
  'platform': 'AML',
  'ContentSnapshotId': '4b5e7d7b-c6bd-449b-b9ba-9e9392416f2f',
  'score': '0.9142857142857143',
  'best_child_run_id': 'HD_ca8ddeb8-87c6-44a6-b2ba-d7fee5d19373_0',
  'best_metric_status': 'Succeeded'},
 'inputDatasets': [],
 'outputDatasets': [],
 'logFiles': {'azureml-logs/hyperdrive.txt': 'https://mlstrg140391.blob.core.windows.net/azureml/ExperimentRun/dcid.HD_ca8ddeb8-87c6-44a6-b2ba-d7fee5d19373/azureml-logs/hyperdrive.txt?sv=2019-02-02&sr=b&sig=ZdbMOCN1t17hEJ7lVf7t4gIFGwi%2FwS0cGUXO1qlW6io%3D&st=2021-03-14T05%3A09%3A55Z&se=2021-03-14T13%3A19%3A55Z&sp=r'},
 'submittedBy': 'ODL_User 140391'}

In [10]:
# get run details
RunDetails(hyperdrive_run).show()
hyperdrive_run.wait_for_completion()

_HyperDriveWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO'…

{'runId': 'HD_ca8ddeb8-87c6-44a6-b2ba-d7fee5d19373',
 'target': 'compute-ml',
 'status': 'Completed',
 'startTimeUtc': '2021-03-14T05:12:57.536357Z',
 'endTimeUtc': '2021-03-14T05:19:44.533031Z',
 'properties': {'primary_metric_config': '{"name": "Accuracy", "goal": "maximize"}',
  'resume_from': 'null',
  'runTemplate': 'HyperDrive',
  'azureml.runsource': 'hyperdrive',
  'platform': 'AML',
  'ContentSnapshotId': '4b5e7d7b-c6bd-449b-b9ba-9e9392416f2f',
  'score': '0.9142857142857143',
  'best_child_run_id': 'HD_ca8ddeb8-87c6-44a6-b2ba-d7fee5d19373_0',
  'best_metric_status': 'Succeeded'},
 'inputDatasets': [],
 'outputDatasets': [],
 'logFiles': {'azureml-logs/hyperdrive.txt': 'https://mlstrg140391.blob.core.windows.net/azureml/ExperimentRun/dcid.HD_ca8ddeb8-87c6-44a6-b2ba-d7fee5d19373/azureml-logs/hyperdrive.txt?sv=2019-02-02&sr=b&sig=ZdbMOCN1t17hEJ7lVf7t4gIFGwi%2FwS0cGUXO1qlW6io%3D&st=2021-03-14T05%3A09%3A55Z&se=2021-03-14T13%3A19%3A55Z&sp=r'},
 'submittedBy': 'ODL_User 140391'}

In [11]:
# get best model
best_run = hyperdrive_run.get_best_run_by_primary_metric()
print(best_run)
print(best_run.get_details()['runDefinition']['arguments'])
print(best_run.get_file_names())
best_run.download_file('./outputs/model.joblib', output_file_path='./outputs')

Run(Experiment: hydraulic-hyperdrive-experiment,
Id: HD_ca8ddeb8-87c6-44a6-b2ba-d7fee5d19373_0,
Type: azureml.scriptrun,
Status: Completed)
['--C', '0.4503054825262272', '--max_iter', '200']
['azureml-logs/55_azureml-execution-tvmps_1ae8144e94ec9396cfbb30dcfcf93b8c31bbfa0a8a00dfd566499134fec559dd_d.txt', 'azureml-logs/65_job_prep-tvmps_1ae8144e94ec9396cfbb30dcfcf93b8c31bbfa0a8a00dfd566499134fec559dd_d.txt', 'azureml-logs/70_driver_log.txt', 'azureml-logs/75_job_post-tvmps_1ae8144e94ec9396cfbb30dcfcf93b8c31bbfa0a8a00dfd566499134fec559dd_d.txt', 'azureml-logs/process_info.json', 'azureml-logs/process_status.json', 'logs/azureml/124_azureml.log', 'logs/azureml/job_prep_azureml.log', 'logs/azureml/job_release_azureml.log', 'outputs/model.joblib']


In [12]:
#TODO: Save the best model
model = best_run.register_model(model_name='hyperdrive_best_run', model_path='outputs/model.joblib')