# Hyperparameter Tuning using HyperDrive

Importing Dependencies

In [1]:
import logging
import os
import json
import csv
import numpy as np
import pandas as pd
import joblib
import requests

import azureml.core
from azureml.core.experiment import Experiment
from azureml.core.workspace import Workspace
from azureml.core.dataset import Dataset
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException
from azureml.core import ScriptRunConfig
from azureml.widgets import RunDetails
from azureml.core import Model, Environment
from azureml.core.model import InferenceConfig
from azureml.core.webservice import AciWebservice
from azureml.train.sklearn import SKLearn
from azureml.train.hyperdrive.run import PrimaryMetricGoal
from azureml.train.hyperdrive.policy import BanditPolicy
from azureml.train.hyperdrive.sampling import RandomParameterSampling
from azureml.train.hyperdrive.runconfig import HyperDriveConfig
from azureml.train.hyperdrive.parameter_expressions import uniform, choice
from azureml.core.conda_dependencies import CondaDependencies

# Check core SDK version number
print("SDK version:", azureml.core.VERSION)

SDK version: 1.38.0


## Dataset

The dataset chosen is the heart disease UCI data from Kaggle. The data contains some features and attributes from individuals and a classification of if they have heart disease or not. The purpose of using this dataset is to create a model that can predict if individuals are likely to have heart disease, based on the same measured features and data.

In [2]:
ws = Workspace.from_config()
print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\n')

experiment_name = 'hyperdrive-exp1'

experiment=Experiment(ws, experiment_name)

quick-starts-ws-187769
aml-quickstarts-187769
southcentralus
81cefad3-d2c9-4f77-a466-99a7f541c7bb


In [3]:
found = False
key = "heartdisease"
description_text = ""

if key in ws.datasets.keys(): 
        found = True
        dataset = ws.datasets[key] 

df = dataset.to_pandas_dataframe()
df.describe()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
count,303.0,303.0,303.0,303.0,303.0,303.0,303.0,303.0,303.0,303.0,303.0,303.0,303.0,303.0
mean,54.366337,0.683168,0.966997,131.623762,246.264026,0.148515,0.528053,149.646865,0.326733,1.039604,1.39934,0.729373,2.313531,0.544554
std,9.082101,0.466011,1.032052,17.538143,51.830751,0.356198,0.52586,22.905161,0.469794,1.161075,0.616226,1.022606,0.612277,0.498835
min,29.0,0.0,0.0,94.0,126.0,0.0,0.0,71.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,47.5,0.0,0.0,120.0,211.0,0.0,0.0,133.5,0.0,0.0,1.0,0.0,2.0,0.0
50%,55.0,1.0,1.0,130.0,240.0,0.0,1.0,153.0,0.0,0.8,1.0,0.0,2.0,1.0
75%,61.0,1.0,2.0,140.0,274.5,0.0,1.0,166.0,1.0,1.6,2.0,1.0,3.0,1.0
max,77.0,1.0,3.0,200.0,564.0,1.0,2.0,202.0,1.0,6.2,2.0,4.0,3.0,1.0


In [6]:
amlcompute_cluster_name = "hd-cluster"
try:
    compute_target = ComputeTarget(workspace=ws, name=amlcompute_cluster_name)
    print('Existing cluster found, this cluster will be used.')
except ComputeTargetException:
    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_DS12_V2',
                                                           vm_priority = 'lowpriority',
                                                           max_nodes=5)
    compute_target = ComputeTarget.create(ws, amlcompute_cluster_name, compute_config)
    print('Cluster created successfully.')

compute_target.wait_for_completion()

Cluster created successfully.
Provisioning operation finished, operation "Succeeded"


## Hyperdrive Configuration

A logistic regression from scikit-learn is used. The two tuning parameters are the regularization (C) and the maximum number of iterations (max_iter). The early termination policy used is the bandit policy to avoid using resources to train the model that is not significantly improving. The accuracy is chosen as the primary metric to create a meaninful comparison with the AutoML models. The script `train.py` is used as the estimator and it takes in the dataset ID as an input.

In [28]:
environment = Environment(name="azure-hd")
conda_env = CondaDependencies()
conda_env.add_pip_package("azureml-defaults")
conda_env.add_conda_package("numpy")
conda_env.add_conda_package("pandas")
conda_env.add_conda_package("scikit-learn")
environment.python.conda_dependencies = conda_env

In [64]:
# TODO: Create an early termination policy. This is not required if you are using Bayesian sampling.
early_termination_policy = BanditPolicy(slack_factor = 0.1, evaluation_interval=2)

#TODO: Create the different params that you will be using during training
param_sampling = RandomParameterSampling(
    {
        '--C' : choice(0.001,0.01,0.1,1.0,10.0,100.0),
        '--max_iter': choice(10,25)
    }
)

#TODO: Create your estimator and hyperdrive config
estimator = ScriptRunConfig(source_directory = ".",
                            script='train.py',
                            arguments=['--data', dataset.id],
                            compute_target=amlcompute_cluster_name,
                            environment=environment)

hyperdrive_run_config = HyperDriveConfig(run_config=estimator,
                                     hyperparameter_sampling=param_sampling, 
                                     policy=early_termination_policy,
                                     primary_metric_name='Accuracy',
                                     primary_metric_goal=PrimaryMetricGoal.MAXIMIZE,
                                     max_total_runs=24,
                                     max_concurrent_runs=4)

In [65]:
#TODO: Submit your experiment
run = experiment.submit(hyperdrive_run_config)

## Run Details

In [67]:
RunDetails(run).show()

_HyperDriveWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO'…

In [40]:
run.wait_for_completion()

{'runId': 'HD_e7e5fcd4-81de-4a74-a89d-71bfea8ee6ab',
 'target': 'hd-cluster',
 'status': 'Completed',
 'startTimeUtc': '2022-03-01T21:35:59.170258Z',
 'endTimeUtc': '2022-03-01T21:42:38.754089Z',
 'services': {},
 'properties': {'primary_metric_config': '{"name": "Accuracy", "goal": "maximize"}',
  'resume_from': 'null',
  'runTemplate': 'HyperDrive',
  'azureml.runsource': 'hyperdrive',
  'platform': 'AML',
  'ContentSnapshotId': '1de1690d-b3a0-427b-a526-432ef161119d',
  'user_agent': 'python/3.6.9 (Linux-5.4.0-1068-azure-x86_64-with-debian-buster-sid) msrest/0.6.21 Hyperdrive.Service/1.0.0 Hyperdrive.SDK/core.1.38.0',
  'space_size': '12',
  'score': '0.881578947368421',
  'best_child_run_id': 'HD_e7e5fcd4-81de-4a74-a89d-71bfea8ee6ab_8',
  'best_metric_status': 'Succeeded'},
 'inputDatasets': [],
 'outputDatasets': [],
 'logFiles': {'azureml-logs/hyperdrive.txt': 'https://mlstrg187769.blob.core.windows.net/azureml/ExperimentRun/dcid.HD_e7e5fcd4-81de-4a74-a89d-71bfea8ee6ab/azureml-log

## Best Model

In [68]:
best_run = run.get_best_run_by_primary_metric()
best_run.get_file_names()

['logs/azureml/dataprep/backgroundProcess.log',
 'logs/azureml/dataprep/backgroundProcess_Telemetry.log',
 'logs/azureml/dataprep/rslex.log',
 'outputs/model.joblib',
 'system_logs/cs_capability/cs-capability.log',
 'system_logs/hosttools_capability/hosttools-capability.log',
 'system_logs/lifecycler/execution-wrapper.log',
 'system_logs/lifecycler/lifecycler.log',
 'system_logs/lifecycler/vm-bootstrapper.log',
 'user_logs/std_log.txt']

In [70]:
best_run.download_file(best_run.get_file_names()[3], 'hd_model.joblib')
joblib.load('hd_model.joblib')

Trying to unpickle estimator LogisticRegression from version 0.23.2 when using version 0.22.2.post1. This might lead to breaking code or invalid results. Use at your own risk.


LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=25,
                   multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=None, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)

## Model Deployment

This model was better than the AutoML, and was registered and deployed. The endpoint was also consumed and the logs for the service printed in the cells below. 

In [77]:
#model = Model.register(ws, model_name='hd-best-model', model_path='hd_model.joblib')
inference_config = InferenceConfig(entry_script='hd_score.py',
                                   environment=environment)
service_name = 'hd-deploy1'
deployment_config = AciWebservice.deploy_configuration(cpu_cores=1.8, memory_gb=4)

service = Model.deploy(workspace=ws,
                       name=service_name,
                       models=[model],
                       inference_config=inference_config,
                       deployment_config=deployment_config,
                       overwrite=True
                      )
service.wait_for_deployment(show_output=True)

scoring_uri = service.scoring_uri
print(scoring_uri)
service.update(enable_app_insights=True)
service.wait_for_deployment(show_output=True)


Tips: You can try get_logs(): https://aka.ms/debugimage#dockerlog or local deployment: https://aka.ms/debugimage#debug-locally to debug if deployment takes longer than 10 minutes.
Running
2022-03-01 22:48:36+00:00 Creating Container Registry if not exists.
2022-03-01 22:48:36+00:00 Registering the environment.
2022-03-01 22:48:37+00:00 Use the existing image.
2022-03-01 22:48:37+00:00 Generating deployment configuration.
2022-03-01 22:48:38+00:00 Submitting deployment to compute.
2022-03-01 22:48:43+00:00 Checking the status of deployment hd-deploy1..
2022-03-01 22:50:36+00:00 Checking the status of inference endpoint hd-deploy1.
Succeeded
ACI service creation operation finished, operation "Succeeded"
http://7f7d3f9a-b48c-402a-91b1-bbca240c9caa.southcentralus.azurecontainer.io/score
Tips: You can try get_logs(): https://aka.ms/debugimage#dockerlog or local deployment: https://aka.ms/debugimage#debug-locally to debug if deployment takes longer than 10 minutes.
Running
2022-03-01 22:50:4

Two examples of data are used, a post request is created and the service is also run to test the deployment in 2 different ways, in the cell below. 

In [93]:
data = {"data": [{"age": 37,
        "sex": 0,
        "cp": 1,
        "trestbps": 150,
        "chol": 233,
        "fbs": 1,
        "restecg": 0,
        "thalach": 190,
        "exang": 0,
        "oldpeak": 0.6,
        "slope": 2,
        "ca": 0,
        "thal": 1},

    {"age": 56,
        "sex": 1,
        "cp": 0,
        "trestbps": 125,
        "chol": 249,
        "fbs": 1,
        "restecg": 0,
        "thalach": 144,
        "exang": 1,
        "oldpeak": 1.2,
        "slope": 1,
        "ca": 1,
        "thal": 2},
      ]}
    
# Convert to JSON string
input_data = json.dumps(data)
with open("data.json", "w") as _f:
    _f.write(input_data)

# Set the content type
headers = {'Content-Type': 'application/json'}
# If authentication is enabled, set the authorization header
# headers['Authorization'] = f'Bearer {key}'

# Make the request and display the response
resp = requests.post(scoring_uri, input_data, headers=headers)
print(resp.json())
print("Case 0: No Heart Disease, Case 1: Heart Disease.")

# test using service instance
output = service.run(input_data)
output



[1, 0]
Case 0: No Heart Disease, Case 1: Heart Disease.


[1, 0]

In [95]:
service.get_logs()

'2022-03-01T22:52:42,131298200+00:00 - gunicorn/run \nDynamic Python package installation is disabled.\nStarting HTTP server\n2022-03-01T22:52:42,140075200+00:00 - rsyslog/run \n2022-03-01T22:52:42,149824800+00:00 - nginx/run \n2022-03-01T22:52:42,145581100+00:00 - iot-server/run \nEdgeHubConnectionString and IOTEDGE_IOTHUBHOSTNAME are not set. Exiting...\n2022-03-01T22:52:42,525988500+00:00 - iot-server/finish 1 0\n2022-03-01T22:52:42,528909400+00:00 - Exit code 1 is normal. Not restarting iot-server.\nStarting gunicorn 20.1.0\nListening at: http://127.0.0.1:31311 (72)\nUsing worker: sync\nworker timeout is set to 300\nBooting worker with pid: 99\nSPARK_HOME not set. Skipping PySpark Initialization.\nInitializing logger\n2022-03-01 22:52:43,513 | root | INFO | Starting up app insights client\nlogging socket was found. logging is available.\nlogging socket was found. logging is available.\n2022-03-01 22:52:43,515 | root | INFO | Starting up request id generator\n2022-03-01 22:52:43,515

In [96]:
service.delete()

In [98]:
compute_target.delete()

**Submission Checklist**
- I have registered the model.
- I have deployed the model with the best accuracy as a webservice.
- I have tested the webservice by sending a request to the model endpoint.
- I have deleted the webservice and shutdown all the computes that I have used.
- I have taken a screenshot showing the model endpoint as active.
- The project includes a file containing the environment details.

