# Hyperparameter Tuning using HyperDrive

TODO: Import Dependencies. In the cell below, import all the dependencies that you will need to complete the project.

In [1]:
import train_xgb # The module for loading external data and training an XGB model
import os
import pandas as pd
import numpy as np
import json
import ast
import pickle
import joblib

from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException
from azureml.core import Workspace, Dataset, Experiment, Model, Environment, ScriptRunConfig
from azureml.data.dataset_factory import TabularDatasetFactory
from azureml.widgets import RunDetails

from azureml.train.hyperdrive.run import PrimaryMetricGoal
from azureml.train.hyperdrive.policy import BanditPolicy
from azureml.train.hyperdrive.sampling import RandomParameterSampling
from azureml.train.hyperdrive.runconfig import HyperDriveConfig
from azureml.train.hyperdrive.parameter_expressions import uniform, loguniform, choice

In [2]:
ws = Workspace.from_config()
print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep='\n')

quick-starts-ws-154570
aml-quickstarts-154570
southcentralus
510b94ba-e453-4417-988b-fbdc37b55ca7


In [3]:
# Create compute cluster
# Choose a name for your CPU cluster
cpu_cluster_name = "cpu-cluster"

# Verify that cluster does not exist already
try:
    cpu_cluster = ComputeTarget(workspace=ws, name=cpu_cluster_name)
    print('Found existing cluster, use it.')
except ComputeTargetException:
    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2',
                                                           min_nodes=1,
                                                           max_nodes=5)
    cpu_cluster = ComputeTarget.create(ws, cpu_cluster_name, compute_config)

cpu_cluster.wait_for_completion(show_output=True)

InProgress......
SucceededProvisioning operation finished, operation "Succeeded"
Succeeded.............................................
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned


## Dataset

TODO: Get data. In the cell below, write code to access the data you will be using in this project. Remember that the dataset needs to be external.

In [4]:
# # Try to load the dataset from the workspace. Otherwise, load if from Kaggle
# found = False
# ds_key = 'Ames-housing-dataset'
# ds_desc = 'Ames Housing training data.'

# if ds_key in ws.datasets.keys():
#     found = True
#     dataset = ws.datasets[ds_key]
#     print(f'Found registered {ds_key}, use it.')
    
# if not found:
#     train, test = ames.load_data_clean()
#     print(f"train.shape = {train.shape}, test.shape = {test.shape}")
#     # Register the train dataset
#     blob = ws.get_default_datastore()
#     dataset = TabularDatasetFactory.register_pandas_dataframe(train, blob, name=ds_key, description=ds_desc)

In [5]:
# Test the train script
# ! python train_xgb.py

## Hyperdrive Configuration

TODO: Explain the model you are using and the reason for chosing the different hyperparameters, termination policy and config settings.

In [5]:
# Choose a name for an experiment
experiment_name = 'Ames-housing-hdr'

experiment=Experiment(ws, experiment_name)

In [8]:
%%writefile conda_env.yml

dependencies:
- python=3.6.2
- pip:
  - inference-schema
  - azureml-defaults==1.32.0
- numpy>=1.16.0,<1.19.0
- pandas==0.25.1
- scikit-learn==0.22.1
- py-xgboost<=0.90
channels:
- anaconda
- conda-forge

Writing conda_env.yml


In [7]:
# Define an Azure ML environment
# Dependencies are the same as for AutoML experiment
env = Environment.from_conda_specification(name='env', file_path='conda_env.yml')

# Configure the training job
src = ScriptRunConfig(source_directory=".",
                     script='train_xgb.py',
                     #arguments=['--learning_rate', 0.01, '--gamma', 5, '--max_depth', 5], # Just for testing
                     compute_target=cpu_cluster,
                     environment=env)

In [9]:
# Test the script run config
# run = experiment.submit(src)

In [8]:
# TODO: Create an early termination policy. This is not required if you are using Bayesian sampling.
# Specify a Policy
policy = BanditPolicy(evaluation_interval=2, slack_factor=0.1)

#TODO: Create the different params that you will be using during training
# Specify parameter sampler
ps = RandomParameterSampling(
    {
        '--learning_rate': loguniform(-4.6, -1.6), # results in [0.01, 0.2]
        '--gamma': uniform(0, 9), 
        '--max_depth': choice(3, 5, 7)
    }
)

#TODO: Create your estimator and hyperdrive config
# src - see above

# Create a HyperDriveConfig using the estimator, hyperparameter sampler, and policy.
hyperdrive_config = HyperDriveConfig(run_config=src,
                                    hyperparameter_sampling=ps,
                                    policy=policy,
                                    primary_metric_name='r2_score',
                                    primary_metric_goal=PrimaryMetricGoal.MAXIMIZE,
                                    max_total_runs=20,
                                    max_concurrent_runs=4,
                                    max_duration_minutes=30)

In [9]:
#TODO: Submit your experiment
hdr = experiment.submit(config=hyperdrive_config)

## Run Details

OPTIONAL: Write about the different models trained and their performance. Why do you think some models did better than others?

TODO: In the cell below, use the `RunDetails` widget to show the different experiments.

In [10]:
# Show run details with the widget.
RunDetails(hdr).show()
hdr.wait_for_completion(show_output=True)

_HyperDriveWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO'…

RunId: HD_b99cc027-08a8-463d-a93e-5550b0bd5482
Web View: https://ml.azure.com/runs/HD_b99cc027-08a8-463d-a93e-5550b0bd5482?wsid=/subscriptions/9e65f93e-bdd8-437b-b1e8-0647cd6098f7/resourcegroups/aml-quickstarts-154426/workspaces/quick-starts-ws-154426&tid=660b3398-b80e-49d2-bc5b-ac1dc93b5254

Streaming azureml-logs/hyperdrive.txt

"<START>[2021-08-13T13:18:38.472210][API][INFO]Experiment created<END>\n""<START>[2021-08-13T13:18:39.035572][GENERATOR][INFO]Trying to sample '4' jobs from the hyperparameter space<END>\n""<START>[2021-08-13T13:18:39.545835][GENERATOR][INFO]Successfully sampled '4' jobs, they will soon be submitted to the execution target.<END>\n"

Execution Summary
RunId: HD_b99cc027-08a8-463d-a93e-5550b0bd5482
Web View: https://ml.azure.com/runs/HD_b99cc027-08a8-463d-a93e-5550b0bd5482?wsid=/subscriptions/9e65f93e-bdd8-437b-b1e8-0647cd6098f7/resourcegroups/aml-quickstarts-154426/workspaces/quick-starts-ws-154426&tid=660b3398-b80e-49d2-bc5b-ac1dc93b5254



{'runId': 'HD_b99cc027-08a8-463d-a93e-5550b0bd5482',
 'target': 'cpu-cluster',
 'status': 'Completed',
 'startTimeUtc': '2021-08-13T13:18:38.104412Z',
 'endTimeUtc': '2021-08-13T13:36:19.242176Z',
 'properties': {'primary_metric_config': '{"name": "r2_score", "goal": "maximize"}',
  'resume_from': 'null',
  'runTemplate': 'HyperDrive',
  'azureml.runsource': 'hyperdrive',
  'platform': 'AML',
  'ContentSnapshotId': 'ed6c4598-294e-405d-b769-a3ec5b731f05',
  'user_agent': 'python/3.6.9 (Linux-5.4.0-1055-azure-x86_64-with-debian-buster-sid) msrest/0.6.21 Hyperdrive.Service/1.0.0 Hyperdrive.SDK/core.1.32.0',
  'score': '0.912108968001802',
  'best_child_run_id': 'HD_b99cc027-08a8-463d-a93e-5550b0bd5482_8',
  'best_metric_status': 'Succeeded'},
 'inputDatasets': [],
 'outputDatasets': [],
 'logFiles': {'azureml-logs/hyperdrive.txt': 'https://mlstrg154426.blob.core.windows.net/azureml/ExperimentRun/dcid.HD_b99cc027-08a8-463d-a93e-5550b0bd5482/azureml-logs/hyperdrive.txt?sv=2019-07-07&sr=b&si

## Best Model

TODO: In the cell below, get the best model from the hyperdrive experiments and display all the properties of the model.

In [11]:
# Get your best run and save the model from that run.
best_run = hdr.get_best_run_by_primary_metric()
print(best_run)

Run(Experiment: Ames-housing-hdr,
Id: HD_b99cc027-08a8-463d-a93e-5550b0bd5482_8,
Type: azureml.scriptrun,
Status: Completed)


In [12]:
best_run_metrics = best_run.get_metrics()
best_run_metrics

{'Learning rate': 0.15327362795270094,
 'Gamma': 6.785822376960304,
 'Maximum depth': 7.0,
 'r2_score': 0.912108968001802}

In [13]:
details = best_run.get_details()

# Save metrics and details for ex-post examination
os.makedirs('./outputs', exist_ok=True)
with open('outputs/best_hdr_metrics.json', 'w') as file:
    json.dump(best_run_metrics, file)
with open('outputs/best_hdr_details.txt', 'w') as file:
    file.write(str(details))

In [14]:
best_run.get_file_names()[-1]

'outputs/model.pkl'

In [15]:
#TODO: Save the best model
best_run.download_file(best_run.get_file_names()[-1], output_file_path='./outputs/')

## Model Deployment

Remember you have to deploy only one of the two models you trained.. Perform the steps in the rest of this notebook only if you wish to deploy this model.

TODO: In the cell below, register the model, create an inference config and deploy the model as a web service.

In [6]:
with open('outputs/best_hdr_metrics.json', 'r') as file:
    best_run_metrics = json.load(file)

In [7]:
# Register the best model
model = Model.register(ws, model_path='outputs/model.pkl', model_name='Ames-Housing-XGB-Model', tags=best_run_metrics)
print(model.name, model.id, model.version, sep='\t')

Registering model Ames-Housing-XGB-Model
Ames-Housing-XGB-Model	Ames-Housing-XGB-Model:1	1


In [9]:
from azureml.core.webservice import AciWebservice
aciconfig = AciWebservice.deploy_configuration(cpu_cores=1,
                                              memory_gb=1,
                                              tags={"data" : "Kaggle", "method" : "XGB"},
                                              description="Predict Ames Housing Prices",
                                              auth_enabled=True,
                                              enable_app_insights=True)

In [10]:
from azureml.core.environment import Environment
from azureml.core.conda_dependencies import CondaDependencies

env = Environment("project-env")
cd = CondaDependencies('conda_env.yml')
env.python.conda_dependencies = cd
# Register environment to re-use later
env.register(workspace=ws)

{
    "databricks": {
        "eggLibraries": [],
        "jarLibraries": [],
        "mavenLibraries": [],
        "pypiLibraries": [],
        "rcranLibraries": []
    },
    "docker": {
        "arguments": [],
        "baseDockerfile": null,
        "baseImage": "mcr.microsoft.com/azureml/openmpi3.1.2-ubuntu18.04:20210615.v1",
        "baseImageRegistry": {
            "address": null,
            "password": null,
            "registryIdentity": null,
            "username": null
        },
        "enabled": false,
        "platform": {
            "architecture": "amd64",
            "os": "Linux"
        },
        "sharedVolumes": true,
        "shmSize": null
    },
    "environmentVariables": {
        "EXAMPLE_ENV_VAR": "EXAMPLE_VALUE"
    },
    "inferencingStackVersion": null,
    "name": "project-env",
    "python": {
        "baseCondaEnvironment": null,
        "condaDependencies": {
            "channels": [
                "anaconda",
                "conda-forge"
  

In [17]:
%%time
import uuid
from azureml.core.webservice import Webservice
from azureml.core.model import InferenceConfig
from azureml.core.environment import Environment
from azureml.core import Workspace
from azureml.core.model import Model

ws = Workspace.from_config()
model = Model(ws, 'Ames-Housing-XGB-Model')

myenv = Environment.get(workspace=ws, name="project-env", version="1")

inference_config = InferenceConfig(entry_script="entry_script.py", environment=myenv)

service_name = 'ames-housing-xgb-' + str(uuid.uuid4())[:4]
service = Model.deploy(workspace=ws,
                      name=service_name,
                      models=[model],
                      inference_config=inference_config,
                      deployment_config=aciconfig)

service.wait_for_deployment(show_output=True)

Tips: You can try get_logs(): https://aka.ms/debugimage#dockerlog or local deployment: https://aka.ms/debugimage#debug-locally to debug if deployment takes longer than 10 minutes.
Running
2021-08-15 08:32:57+00:00 Creating Container Registry if not exists.
2021-08-15 08:32:57+00:00 Registering the environment.
2021-08-15 08:32:59+00:00 Use the existing image.
2021-08-15 08:32:59+00:00 Generating deployment configuration.
2021-08-15 08:33:00+00:00 Submitting deployment to compute.
2021-08-15 08:33:03+00:00 Checking the status of deployment ames-housing-xgb-7a9c..
2021-08-15 08:35:48+00:00 Checking the status of inference endpoint ames-housing-xgb-7a9c.
Succeeded
ACI service creation operation finished, operation "Succeeded"
CPU times: user 512 ms, sys: 39.4 ms, total: 552 ms
Wall time: 2min 57s


In [18]:
print(service.get_logs())

2021-08-15T08:35:34,052374100+00:00 - iot-server/run 
2021-08-15T08:35:34,051028900+00:00 - gunicorn/run 
File not found: /var/azureml-app/.
Starting HTTP server
2021-08-15T08:35:34,059049300+00:00 - rsyslog/run 
2021-08-15T08:35:34,068409200+00:00 - nginx/run 
EdgeHubConnectionString and IOTEDGE_IOTHUBHOSTNAME are not set. Exiting...
2021-08-15T08:35:34,439479000+00:00 - iot-server/finish 1 0
2021-08-15T08:35:34,442017800+00:00 - Exit code 1 is normal. Not restarting iot-server.
Starting gunicorn 20.1.0
Listening at: http://127.0.0.1:31311 (57)
Using worker: sync
worker timeout is set to 300
Booting worker with pid: 88
SPARK_HOME not set. Skipping PySpark Initialization.
Initializing logger
2021-08-15 08:35:35,976 | root | INFO | Starting up app insights client
logging socket was found. logging is available.
logging socket was found. logging is available.
2021-08-15 08:35:35,977 | root | INFO | Starting up request id generator
2021-08-15 08:35:35,978 | root | INFO | Starting up app in

TODO: In the cell below, send a request to the web service you deployed to test it.

In [13]:
from azureml.core.authentication import InteractiveLoginAuthentication

interactive_auth = InteractiveLoginAuthentication()
auth_header = interactive_auth.get_authentication_header()

In [22]:
# Prepare data for request
_ , test = train_xgb.load_data_clean()
test = train_xgb.label_encode(test)
data = {'data': test.head().to_dict(orient='list')}

# Replace the next cell with the code from 'Consume' tab of the endpoint
# and delete 'data = {}' assignment as data is defined in this cell!  

In [23]:
import urllib.request
import json
import os
import ssl

def allowSelfSignedHttps(allowed):
    # bypass the server certificate verification on client side
    if allowed and not os.environ.get('PYTHONHTTPSVERIFY', '') and getattr(ssl, '_create_unverified_context', None):
        ssl._create_default_https_context = ssl._create_unverified_context

allowSelfSignedHttps(True) # this line is needed if you use self-signed certificate in your scoring service.

body = str.encode(json.dumps(data))

url = 'http://0680a4cc-0cb7-4e4e-a060-c506eaeb245b.southcentralus.azurecontainer.io/score'
api_key = 'SATMLdOCE7lqUBgY2afQSvxfHfl05ouL' # Replace this with the API key for the web service
headers = {'Content-Type':'application/json', 'Authorization':('Bearer '+ api_key)}

req = urllib.request.Request(url, body, headers)

try:
    response = urllib.request.urlopen(req)

    result = response.read()
    print(result)
except urllib.error.HTTPError as error:
    print("The request failed with status code: " + str(error.code))

    # Print the headers - they include the requert ID and the timestamp, which are useful for debugging the failure
    print(error.info())
    print(json.loads(error.read().decode("utf8", 'ignore')))

b'"{\\"result\\": [125541.265625, 160472.15625, 190638.34375, 196659.453125, 188028.796875]}"'


In [35]:
# Decode the result for subsequent processing
import ast
ast.literal_eval(json.loads(result))['result']

[125541.265625, 160472.15625, 190638.34375, 196659.453125, 188028.796875]

TODO: In the cell below, print the logs of the web service and delete the service

In [36]:
service.delete()
# Delete() is used to deprovision and delete the AmlCompute target. 
cpu_cluster.delete()