# Hyperparameter Tuning using HyperDrive

In [50]:
import json
import requests
import os
import joblib

from azureml.core import Dataset, Workspace, Experiment
from azureml.core.compute import ComputeTarget
from azureml.widgets import RunDetails
from azureml.train.sklearn import SKLearn
from azureml.train.hyperdrive.run import PrimaryMetricGoal
from azureml.train.hyperdrive import BayesianParameterSampling
from azureml.train.hyperdrive import uniform, choice
from azureml.core.compute import ComputeTarget
from azureml.train.hyperdrive.runconfig import HyperDriveConfig

from azureml.core.environment import Environment
from azureml.core.webservice import AciWebservice
from azureml.core.model import InferenceConfig, Model

## Dataset

In [2]:
ws = Workspace.from_config()

dataset = Dataset.get_by_name(ws, name='heart-disease-uci')

experiment_name = 'hd-heart-disease'
experiment=Experiment(ws, experiment_name)

In [3]:
# Get created compute cluster
compute_cluster_name = 'RAM-cluster'
compute_cluster = ComputeTarget(workspace=ws, name=compute_cluster_name)

## Hyperdrive Configuration


Because we will use Bayesian Parameter Sampling, no early termination policies will be required. It was chosen for us to be able to search for a very good model to compete with the one generated by the AutoML aproach.

The primary metric goal chosen was to maximize Accuracy, in order to have a good comparison with the AutoML as well.

We will use the same compute cluster as before, in order to maximize performance, and with the maximum number of runs being 20 times the number of parameters we are varying, for it is a recommendation on the Azure ML Documentation. I've also set the maximum number of concurrent runs to be 4, in order to have a better convergence with the Bayesian sampling method (more info here: https://docs.microsoft.com/en-us/azure/machine-learning/how-to-tune-hyperparameters#define-search-space)

In [4]:
primary_metric_name = "Accuracy"
primary_metric_goal = PrimaryMetricGoal.MAXIMIZE

In [38]:
param_sampling = BayesianParameterSampling( {
        "n_estimators": choice(range(5,200)),
        "min_weight_fraction_leaf": uniform(0.0, 0.5)
    }
)

estimator = SKLearn(source_directory='.',
                      entry_script='train.py',
                      compute_target=compute_cluster)

hyperdrive_run_config = HyperDriveConfig(
        estimator=estimator,
        hyperparameter_sampling=param_sampling,
        primary_metric_name=primary_metric_name,
        primary_metric_goal=primary_metric_goal,
        max_total_runs=40,
        max_concurrent_runs=4
)

In [39]:
#TODO: Submit your experiment
hp_run = experiment.submit(hyperdrive_run_config, show_output=True)



## Run Details

In [40]:
RunDetails(hp_run).show()

_HyperDriveWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO'…

## Best Model

In [41]:
best_hp_run = hp_run.get_best_run_by_primary_metric()

In [43]:
best_hp_run

Experiment,Id,Type,Status,Details Page,Docs Page
hd-heart-disease,HD_0c6314de-3716-488a-925c-4ccc77ab6fcb_25,azureml.scriptrun,Completed,Link to Azure Machine Learning studio,Link to Documentation


In [48]:
best_run_metrics = best_hp_run.get_metrics()
parameter_values = best_hp_run.get_details()['runDefinition']['arguments']

print('Best Run Id: ', best_hp_run.id)
print('\n Accuracy:', best_run_metrics['Accuracy'])
print('\n n_estimators:',parameter_values[1])
print('\n min_weight_fraction_leaf:',parameter_values[3])

Best Run Id:  HD_0c6314de-3716-488a-925c-4ccc77ab6fcb_25

 Accuracy: 0.9016393442622951

 n_estimators: 106

 min_weight_fraction_leaf: 0.15096786229769743


## Model Deployment

In [75]:
# Registering model
reg_model = best_hp_run.register_model(model_name='HD-heart-disease',
                           tags={'hyperdrive': 'heart-disease'},
                           model_path='outputs/hp-heart-disease_0.9016393442622951.joblib')

In [76]:
my_env = Environment.get(ws, name='AzureML-Scikit-learn-0.20.3')

In [77]:
inference_config = InferenceConfig(entry_script="score.py",
                                    environment=my_env)

deployment_config = AciWebservice.deploy_configuration(cpu_cores = 1, memory_gb = 0.5)

In [78]:
model = Model(ws, name='HD-heart-disease')
service = Model.deploy(ws, 'predict-heart', [model], inference_config, deployment_config)

service.wait_for_deployment(True)
print(service.state)
print("scoring URI: " + service.scoring_uri)

Tips: You can try get_logs(): https://aka.ms/debugimage#dockerlog or local deployment: https://aka.ms/debugimage#debug-locally to debug if deployment takes longer than 10 minutes.
Running.........................
Succeeded
ACI service creation operation finished, operation "Succeeded"
Healthy
scoring URI: http://b9dcd8d7-0d76-49ef-b789-a49b31cc212c.eastus.azurecontainer.io/score


**Testing the web service**

In [108]:
url = service.scoring_uri

testing_data = [[
        62,
        1,
        3,
        145,
        233,
        1,
        0,
        150,
        0,
        2.1,
        0,
        0,
        1
]]


headers = {'Content-type': 'application/json'}

x = requests.post(url, data = json.dumps(testing_data), headers=headers)

print(x.json())

[1]


In [109]:
service.get_logs()

'/bin/bash: /azureml-envs/azureml_cfb84a87600b73f3f87eb83242ac9b5d/lib/libtinfo.so.5: no version information available (required by /bin/bash)\n/bin/bash: /azureml-envs/azureml_cfb84a87600b73f3f87eb83242ac9b5d/lib/libtinfo.so.5: no version information available (required by /bin/bash)\n/bin/bash: /azureml-envs/azureml_cfb84a87600b73f3f87eb83242ac9b5d/lib/libtinfo.so.5: no version information available (required by /bin/bash)\nbash: /azureml-envs/azureml_cfb84a87600b73f3f87eb83242ac9b5d/lib/libtinfo.so.5: no version information available (required by bash)\n/bin/bash: /azureml-envs/azureml_cfb84a87600b73f3f87eb83242ac9b5d/lib/libtinfo.so.5: no version information available (required by /bin/bash)\n2020-11-07T21:45:41,432975064+00:00 - rsyslog/run \n2020-11-07T21:45:41,444155101+00:00 - nginx/run \n2020-11-07T21:45:41,445128005+00:00 - gunicorn/run \n2020-11-07T21:45:41,454862237+00:00 - iot-server/run \n/usr/sbin/nginx: /azureml-envs/azureml_cfb84a87600b73f3f87eb83242ac9b5d/lib/libcrypt

In [110]:
service.delete()