# Hyperparameter Tuning using HyperDrive

TODO: Import Dependencies. In the cell below, import all the dependencies that you will need to complete the project.

In [15]:
import ames # The module for loading external data - Ames Housing dataset
import os
import pandas as pd
import numpy as np
import json
import ast
import pickle

from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException
from azureml.core import Workspace, Dataset, Experiment, Model, Environment, ScriptRunConfig
from azureml.data.dataset_factory import TabularDatasetFactory
from azureml.widgets import RunDetails

from azureml.train.hyperdrive.run import PrimaryMetricGoal
from azureml.train.hyperdrive.policy import BanditPolicy
from azureml.train.hyperdrive.sampling import RandomParameterSampling
from azureml.train.hyperdrive.runconfig import HyperDriveConfig
from azureml.train.hyperdrive.parameter_expressions import uniform, loguniform, choice

In [2]:
ws = Workspace.from_config()
print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep='\n')

quick-starts-ws-154042
aml-quickstarts-154042
southcentralus
81cefad3-d2c9-4f77-a466-99a7f541c7bb


In [3]:
# Create compute cluster
# Choose a name for your CPU cluster
cpu_cluster_name = "cpu-cluster"

# Verify that cluster does not exist already
try:
    cpu_cluster = ComputeTarget(workspace=ws, name=cpu_cluster_name)
    print('Found existing cluster, use it.')
except ComputeTargetException:
    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2',
                                                           max_nodes=4)
    cpu_cluster = ComputeTarget.create(ws, cpu_cluster_name, compute_config)

cpu_cluster.wait_for_completion(show_output=True)

InProgress.....
SucceededProvisioning operation finished, operation "Succeeded"
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned


## Dataset

TODO: Get data. In the cell below, write code to access the data you will be using in this project. Remember that the dataset needs to be external.

In [4]:
# Try to load the dataset from the workspace. Otherwise, load if from Kaggle
found = False
ds_key = 'Ames-housing-dataset'
ds_desc = 'Ames Housing training data.'

if ds_key in ws.datasets.keys():
    found = True
    dataset = ws.datasets[ds_key]
    print(f'Found registered {ds_key}, use it.')
    
if not found:
    train, test = ames.load_data_clean()
    print(f"train.shape = {train.shape}, test.shape = {test.shape}")
    # Register the train dataset
    blob = ws.get_default_datastore()
    dataset = TabularDatasetFactory.register_pandas_dataframe(train, blob, name=ds_key, description=ds_desc)

train.shape = (1460, 80), test.shape = (1459, 79)
Validating arguments.
Arguments validated.
Successfully obtained datastore reference and path.
Uploading file to managed-dataset/a0f98968-2133-4d47-9c9e-247029ea8b8e/
Successfully uploaded file to datastore.
Creating and registering a new dataset.
Successfully created and registered a new dataset.


Method register_pandas_dataframe: This is an experimental method, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.


In [5]:
%%writefile train_xgb.py
"""Train, evaluate and log metrics for selected ML algorithm 
in the Azure workspace context."""

import argparse
import os
import numpy as np
import pandas as pd
import joblib
import ames

from azureml.core.run import Run
from azureml.core import Workspace

from xgboost import XGBRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score


ws = Workspace.from_config()
ds_key = 'Ames-housing-dataset'
dataset = ws.datasets[ds_key]

train = dataset.to_pandas_dataframe()

X_train, X_test = train_test_split(ames.label_encode(ames.encode_dtypes(train)))
y_train = X_train.pop('SalePrice')
y_test = X_test.pop('SalePrice')

print(f"X_train.shape = {X_train.shape}, X_test.shape = {X_test.shape}")

run = Run.get_context()

parser = argparse.ArgumentParser()

parser.add_argument('--learning_rate', type=float, default=0.1,
                   help='Step size shrinkage used in update to prevent overfiffing')

parser.add_argument('--gamma', type=float, default=2,
                   help='Minimum loss reduction required to make a further partition on a leaf node of the tree')

parser.add_argument('--max_depth', type=int, default=3,
                   help='Maximum depth of a tree')

args = parser.parse_args()
run.log('Learning rate', np.float(args.learning_rate))
run.log('Gamma', np.float(args.gamma))
run.log('Maximum depth', np.float(args.max_depth))

model = XGBRegressor(learning_rate=args.learning_rate, gamma=args.gamma, max_depth=args.max_depth, objective='reg:squarederror')
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
r2 = r2_score(y_test, y_pred)

run.log("r2_score", np.float(r2))
print(f'Writting r2 score = {r2} into a log.')

os.makedirs('./outputs', exist_ok=True)
joblib.dump(model, './outputs/model.joblib')

Writing train_xgb.py


In [50]:
#! python train_xgb.py

X_train.shape = (1095, 79), X_test.shape = (365, 79)
Attempted to log scalar metric Learning rate:
0.1
Attempted to log scalar metric Gamma:
2.0
Attempted to log scalar metric Maximum depth:
3.0
  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \
Attempted to log scalar metric r2_score:
0.9165099581562921
Writting r2 score = 0.9165099581562921 into a log.


In [6]:
%%writefile conda_env.yml

dependencies:
- python=3.6.2
- pip:
  - azureml-defaults==1.32.0
- scikit-learn
- xgboost

Writing conda_env.yml


In [7]:
# Define an Azure ML environment
# Dependencies are the same as for AutoML experiment
env = Environment.from_conda_specification(name='env', file_path='conda_env.yml')

# Configure the training job
src = ScriptRunConfig(source_directory=".",
                     script='train_xgb.py',
                     # arguments=['--learning_rate', 0.01, '--gamma', 5, '--max_depth', 5], # Just for testing
                     compute_target=cpu_cluster,
                     environment=env)

## Hyperdrive Configuration

TODO: Explain the model you are using and the reason for chosing the different hyperparameters, termination policy and config settings.

In [8]:
# Choose a name for an experiment
experiment_name = 'Ames-housing-hdr'

experiment=Experiment(ws, experiment_name)

In [16]:
# Test the script
# run = experiment.submit(src)
np.log(np.array([0.01, 0.2]))

array([-4.60517019, -1.60943791])

In [20]:
# TODO: Create an early termination policy. This is not required if you are using Bayesian sampling.
# Specify a Policy
policy = BanditPolicy(evaluation_interval=2, slack_factor=0.1)

#TODO: Create the different params that you will be using during training
# Specify parameter sampler
ps = RandomParameterSampling(
    {
        '--learning_rate': loguniform(-4.6, -1.6),
        '--gamma': uniform(0, 9), 
        '--max_depth': choice(3, 5, 7)
    }
)

#TODO: Create your estimator and hyperdrive config
# src - see above

# Create a HyperDriveConfig using the estimator, hyperparameter sampler, and policy.
hyperdrive_config = HyperDriveConfig(run_config=src,
                                    hyperparameter_sampling=ps,
                                    policy=policy,
                                    primary_metric_name='r2_score',
                                    primary_metric_goal=PrimaryMetricGoal.MAXIMIZE,
                                    max_total_runs=20,
                                    max_concurrent_runs=4,
                                    max_duration_minutes=30)

In [21]:
#TODO: Submit your experiment
hdr = experiment.submit(config=hyperdrive_config)

## Run Details

OPTIONAL: Write about the different models trained and their performance. Why do you think some models did better than others?

TODO: In the cell below, use the `RunDetails` widget to show the different experiments.

In [22]:
# Show run details with the widget.
RunDetails(hdr).show()
hdr.wait_for_completion(show_output=True)

_HyperDriveWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO'…

RunId: HD_8ae1c9cf-f6e9-4c9e-99f9-cd4ba110d790
Web View: https://ml.azure.com/runs/HD_8ae1c9cf-f6e9-4c9e-99f9-cd4ba110d790?wsid=/subscriptions/81cefad3-d2c9-4f77-a466-99a7f541c7bb/resourcegroups/aml-quickstarts-154042/workspaces/quick-starts-ws-154042&tid=660b3398-b80e-49d2-bc5b-ac1dc93b5254

Streaming azureml-logs/hyperdrive.txt

"<START>[2021-08-09T09:16:37.055359][API][INFO]Experiment created<END>\n""<START>[2021-08-09T09:16:37.599338][GENERATOR][INFO]Trying to sample '4' jobs from the hyperparameter space<END>\n""<START>[2021-08-09T09:16:37.815738][GENERATOR][INFO]Successfully sampled '4' jobs, they will soon be submitted to the execution target.<END>\n"


## Best Model

TODO: In the cell below, get the best model from the hyperdrive experiments and display all the properties of the model.

In [None]:

# Get your best run and save the model from that run.
best_run, fitted_model = hdr.get_output()
print(best_run)
print(fitted_model)

In [None]:
best_run_metrics = best_run.get_metrics()
best_run_metrics

In [None]:
details = best_run.get_details()

# Save metrics and details for ex-post examination
with open('best_hdr_metrics.json', 'w') as file:
    json.dump(best_run_metrics, file)
with open('best_hdr_details.txt', 'w') as file:
    file.write(str(details))

In [None]:
#TODO: Save the best model

# Check the path to the model
for i,n in enumerate(best_run.get_file_names()):
    print(i,n)

In [None]:
# Save the best model
os.makedirs('./outputs/', exist_ok=True)
for i in range(32,41):
    print(best_run.get_file_names()[i])
    best_run.download_file(best_run.get_file_names()[i], output_file_path='./outputs/')

## Model Deployment

Remember you have to deploy only one of the two models you trained.. Perform the steps in the rest of this notebook only if you wish to deploy this model.

TODO: In the cell below, register the model, create an inference config and deploy the model as a web service.

In [None]:
# Register the best model
model = Model.register(ws, model_path='outputs/model.pkl', model_name='Ames-Housing-AutoML-Model', tags=best_run_metrics)
print(model.name, model.id, model.version, sep='\t')

TODO: In the cell below, send a request to the web service you deployed to test it.

TODO: In the cell below, print the logs of the web service and delete the service

In [59]:
# Delete() is used to deprovision and delete the AmlCompute target. 
cpu_cluster.delete()

Current provisioning state of AmlCompute is "Deleting"

Current provisioning state of AmlCompute is "Deleting"

Current provisioning state of AmlCompute is "Deleting"

