# Hyperparameter Tuning using HyperDrive


In [1]:
from azureml.core import Workspace, Experiment, Run
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException
from azureml.core import ScriptRunConfig, Environment
from azureml.widgets import RunDetails
from azureml.core.dataset import Dataset

In [2]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

## Dataset


In [3]:
workspace = "MLworkspace"

#ws = Workspace.get(name=workspace)
ws = Workspace.from_config()
exp = Experiment(workspace=ws, name="udacity-HyperTune-BreastCancer")

print('Workspace name: ' + ws.name, 
      'Azure region: ' + ws.location, 
      'Subscription id: ' + ws.subscription_id, 
      'Resource group: ' + ws.resource_group, sep = '\n')

#run = exp.start_logging()

Workspace name: MLworkspace
Azure region: eastus
Subscription id: 5831e312-31c6-42ba-b4a9-5c780e5ea1da
Resource group: MLAzureGroup


### Compute Target

In [4]:
clustername = 'StandardDS12CPU'
is_new_cluster = False
try:
    gpu_cluster = ComputeTarget(workspace = ws,name= clustername)
    print("Find the existing cluster")
except ComputeTargetException:
    print("Cluster not find - Creating cluster")
    is_new_cluster = True
    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2',
                                                           max_nodes=4)
    gpu_cluster = ComputeTarget.create(ws, clustername, compute_config)

gpu_cluster.wait_for_completion(show_output=True)

Find the existing cluster
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned


### Setup Env

In [5]:
keras_env = Environment.from_conda_specification(name='keras-env', file_path='conda_dependencies.yml')

# Specify a GPU base image
keras_env.docker.enabled = True

### Get data from datastore

In [6]:
ds_tr = ws.get_default_datastore()
ds = Dataset.Tabular.from_delimited_files(path=ds_tr.path('cancerdata2/cancer_train_data.csv'))

In [7]:
ds.to_pandas_dataframe().head()

Unnamed: 0,diagnosis,radius_mean,texture_mean,perimeter_mean,area_mean,smoothness_mean,compactness_mean,concavity_mean,concave points_mean,symmetry_mean,...,radius_worst,texture_worst,perimeter_worst,area_worst,smoothness_worst,compactness_worst,concavity_worst,concave points_worst,symmetry_worst,fractal_dimension_worst
0,0,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,0.2419,...,25.38,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189
1,0,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,0.1812,...,24.99,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902
2,0,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,0.2069,...,23.57,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758
3,0,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,0.2597,...,14.91,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173
4,0,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,0.1809,...,22.54,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678


## Hyperdrive Configuration


In [8]:
from azureml.widgets import RunDetails
from azureml.train.hyperdrive.run import PrimaryMetricGoal
from azureml.train.hyperdrive.policy import BanditPolicy
from azureml.train.hyperdrive.sampling import GridParameterSampling
from azureml.train.hyperdrive.runconfig import HyperDriveConfig
from azureml.train.hyperdrive.parameter_expressions import uniform
from azureml.train.hyperdrive import choice

# Specify parameter sampler
ps = GridParameterSampling({
    "--max_depth":choice(3,6,12,20),
    "--max_features":choice('auto','sqrt','log2'),
    "--min_samples_leaf":choice(1,3,5),
    "--n_estimator":choice(20,40,100,1000)
})


# Specify a Policy
policy = BanditPolicy(slack_factor = 0.1, evaluation_interval=1, delay_evaluation=5)

estimator = ScriptRunConfig(source_directory='./scripts',
                      script='train.py',
                      compute_target=gpu_cluster,
                      environment=keras_env)

# Create a HyperDriveConfig using the estimator, hyperparameter sampler, and policy.
hyperdrive_config = HyperDriveConfig(run_config= estimator,
                             hyperparameter_sampling=ps,
                             policy=policy,
                             primary_metric_name="accuracy",
                             primary_metric_goal=PrimaryMetricGoal.MAXIMIZE,
                             max_total_runs=50,
                             max_concurrent_runs=4,
                             max_duration_minutes= 20)

In [9]:
#TODO: Submit your experiment
# Submit your hyperdrive run to the experiment and show run details with the widget.
hyperdrive_run = exp.submit(hyperdrive_config)
hyperdrive_run

Experiment,Id,Type,Status,Details Page,Docs Page
udacity-HyperTune-BreastCancer,HD_3fcc974c-6df7-4b16-927e-98b6398e07bf,hyperdrive,Running,Link to Azure Machine Learning studio,Link to Documentation


## Run Details


In [10]:
RunDetails(hyperdrive_run).show()

_HyperDriveWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO'…

## Best Model


In [11]:
import joblib
# Get your best run and save the model from that run.

best_run = hyperdrive_run.get_best_run_by_primary_metric()
if best_run is None:
    raise Exception("No best run was found")
best_run

Experiment,Id,Type,Status,Details Page,Docs Page
udacity-HyperTune-BreastCancer,HD_3fcc974c-6df7-4b16-927e-98b6398e07bf_2,azureml.scriptrun,Completed,Link to Azure Machine Learning studio,Link to Documentation


In [12]:
#TODO: Save the best model
# The model was saved in the train scripts
print(best_run)

Run(Experiment: udacity-HyperTune-BreastCancer,
Id: HD_3fcc974c-6df7-4b16-927e-98b6398e07bf_2,
Type: azureml.scriptrun,
Status: Completed)


In [13]:
print(best_run.get_metrics())

{'n_estimator:': 20, 'max_depth:': 12, 'max_features:': 'auto', 'min_samples_leaf:': 1, 'accuracy': 0.965034965034965, 'f1score': 0.9720670391061451}


In [16]:
best_run.properties


{'_azureml.ComputeTargetType': 'amlcompute',
 'ContentSnapshotId': 'faec31f1-502c-4fb9-83b5-1e588d96dd49',
 'ProcessInfoFile': 'azureml-logs/process_info.json',
 'ProcessStatusFile': 'azureml-logs/process_status.json'}

In [None]:
description = 'Breast Cancer Hyper'
model = automl_run.register_model(model_name = model_name,description= description,tags =None)

print(automl_run.model_id)
model

In [17]:
model = best_run.register_model(model_name='breast_cancer_hyperdrive',
                           tags=None,
                           model_path='outputs/cancer_model.pkl')
print(model.name, model.id, model.version, sep='\t')

breast_cancer_hyperdrive	breast_cancer_hyperdrive:1	1
