# Hyperparameter Tuning using HyperDrive

In [57]:
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core import ScriptRunConfig, Dataset, Workspace, Experiment, Environment
from azureml.core.webservice import AciWebservice
from azureml.widgets import RunDetails

from azureml.train.hyperdrive.run import PrimaryMetricGoal
from azureml.train.hyperdrive.policy import MedianStoppingPolicy
from azureml.train.hyperdrive.sampling import BayesianParameterSampling
from azureml.train.hyperdrive.runconfig import HyperDriveConfig
from azureml.train.hyperdrive.parameter_expressions import uniform, choice

from azureml.core.model import InferenceConfig, Model

import joblib
import json
import requests

In [5]:
ws = Workspace.from_config()
experiment_name = 'mushroom'

experiment=Experiment(ws, experiment_name)

## Dataset

In [66]:
found = False
key = "Mushrooms"

if key in ws.datasets.keys(): 
        found = True
        dataset = ws.datasets[key] 
        
if not found:
        dataset = Dataset.Tabular.from_delimited_files('https://raw.githubusercontent.com/sannif/udacity_capstone_project/main/dataset/mushrooms.csv', infer_column_types=False)       
        dataset = dataset.register(workspace=ws, name=key)
        
dataset.take(5).to_pandas_dataframe()

Unnamed: 0,class,cap-shape,cap-surface,cap-color,bruises,odor,gill-attachment,gill-spacing,gill-size,gill-color,...,stalk-surface-below-ring,stalk-color-above-ring,stalk-color-below-ring,veil-type,veil-color,ring-number,ring-type,spore-print-color,population,habitat
0,p,x,s,n,True,p,False,c,n,k,...,s,w,w,p,w,o,p,k,s,u
1,e,x,s,y,True,a,False,c,b,k,...,s,w,w,p,w,o,p,n,n,g
2,e,b,s,w,True,l,False,c,b,n,...,s,w,w,p,w,o,p,n,n,m
3,p,x,y,w,True,p,False,c,n,n,...,s,w,w,p,w,o,p,k,s,u
4,e,x,s,g,False,n,False,w,b,k,...,s,w,w,p,w,o,e,n,a,g


## Compute

In [7]:
compute_name = "cluster1"
vm_size = "Standard_DS12_v2"
min_nodes, max_nodes = 1, 6
if compute_name in ws.compute_targets:
    compute_target = ws.compute_targets[compute_name]
    if compute_target and type(compute_target) is AmlCompute:
        print("found compute target: " + compute_name)
else:
    print("creating new compute target...")
    provisioning_config = AmlCompute.provisioning_configuration(vm_size = vm_size, min_nodes = min_nodes, max_nodes = max_nodes)
    compute_target = ComputeTarget.create(ws, compute_name, provisioning_config)
compute_target.wait_for_completion(show_output=True)

creating new compute target...
Creating......
SucceededProvisioning operation finished, operation "Succeeded"
Succeeded........................
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned


## Hyperdrive Configuration
We fit a *Random Forest* classifier to the data. 4 hyperparameters are tuned:
* n_estimators: the number of trees
* max_depth: the depth of a tree
* criterion: the function to measure the quality of a split
* min_samples_leaf: the minimum number of samples required to be at a leaf node.  

More information on the role of each hyperparameter can be found [here](https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.RandomForestClassifier.html).

We choose a *MedianStoppingPolicy* as the termination policy. It permits to stop non promising runs and save costs. A Bayesian sampling is used to sample the hyperparameter space. Finally, we limited the total number of runs to 25.

In [96]:
early_termination_policy = MedianStoppingPolicy(delay_evaluation=5)

param_sampling = BayesianParameterSampling({
    'n_estimators': choice([50, 100, 250, 500]),
    'max_depth': choice([5, 6, 7, 8, 9, 10, 15]),
    'criterion': choice(['gini', 'entropy']),
    'min_samples_leaf': choice(range(1, 5))
})


sklearn_env = Environment.get(ws, 'AzureML-lightgbm-3.2-ubuntu18.04-py37-cpu')

estimator = ScriptRunConfig(source_directory = 'scripts/',
                            script = 'train.py',
                            arguments=['--n_estimators', 100, '--max_depth', 6, '--criterion', 'gini', '--min_samples_leaf', 1],
                            compute_target = compute_target,
                            environment = sklearn_env)

hyperdrive_run_config = HyperDriveConfig(run_config = estimator,
                                         hyperparameter_sampling = param_sampling,
                                         primary_metric_name='accuracy',
                                         primary_metric_goal=PrimaryMetricGoal.MAXIMIZE,
                                         max_total_runs=25,
                                         max_concurrent_runs=6)

For best results with Bayesian Sampling we recommend using a maximum number of runs greater than or equal to 20 times the number of hyperparameters being tuned. Recommendend value:80.


In [97]:
hyperdrive_run = experiment.submit(hyperdrive_run_config)

## Run Details
All the runs performed very well with an accuracy between 99.6 and 100%. It seems like the values of the hyperparameters didn't impact much the performance of the models.

In [98]:
RunDetails(hyperdrive_run).show()

_HyperDriveWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO'…

## Best Model

In [99]:
best_run = hyperdrive_run.get_best_run_by_primary_metric()
best_run

Experiment,Id,Type,Status,Details Page,Docs Page
mushroom,HD_308f174e-c143-4d9c-b50d-9cd09950c9c5_1,azureml.scriptrun,Completed,Link to Azure Machine Learning studio,Link to Documentation


Below are the hyperparameters values of the best model.

In [106]:
best_run.get_details()['runDefinition']['arguments']

['--n_estimators',
 '100',
 '--max_depth',
 '6',
 '--criterion',
 'gini',
 '--min_samples_leaf',
 '1',
 '--n_estimators',
 '250',
 '--max_depth',
 '9',
 '--criterion',
 'entropy',
 '--min_samples_leaf',
 '3']

In [107]:
best_run.get_details()['properties']

{'_azureml.ComputeTargetType': 'amlcompute',
 'ContentSnapshotId': '7b491e0d-dacc-4dcc-a673-bfffe1c9daf1',
 'ProcessInfoFile': 'azureml-logs/process_info.json',
 'ProcessStatusFile': 'azureml-logs/process_status.json'}

In [108]:
os.makedirs('models', exist_ok=True)
best_run.download_file('outputs/model.pkl', 'models/random_forest.pkl')

## Model Deployment

### Register the model

In [109]:
model = Model.register(ws, model_name='rf_mushroom', model_path='models/random_forest.pkl')

Registering model rf_mushroom


### Configuration
We use the script [scores.py](https://github.com/sannif/udacity_capstone_project/blob/68a36537213552cc3147d761afa51fb16cd5c869/scripts/score.py) for the inference.

In [116]:
deployment_config = AciWebservice.deploy_configuration(cpu_cores=0.5, memory_gb=1, auth_enabled=True)

sklearn_env.inferencing_stack_version = 'latest'
inference_config = InferenceConfig(source_directory="scripts/",
                                   entry_script="score.py",
                                   environment=sklearn_env)

### Deployment

In [117]:
service = Model.deploy(ws, name = "mushroom-service", 
                       models = [model],
                       inference_config = inference_config,
                       deployment_config = deployment_config,
                       overwrite=True)
service.wait_for_deployment(show_output=True)

Tips: You can try get_logs(): https://aka.ms/debugimage#dockerlog or local deployment: https://aka.ms/debugimage#debug-locally to debug if deployment takes longer than 10 minutes.
Running
2021-07-07 19:09:01+00:00 Creating Container Registry if not exists.
2021-07-07 19:09:01+00:00 Registering the environment.
2021-07-07 19:09:01+00:00 Use the existing image.
2021-07-07 19:09:01+00:00 Generating deployment configuration.
2021-07-07 19:09:02+00:00 Submitting deployment to compute.
2021-07-07 19:09:04+00:00 Checking the status of deployment mushroom-service..
2021-07-07 19:14:28+00:00 Checking the status of inference endpoint mushroom-service.
Succeeded
ACI service creation operation finished, operation "Succeeded"


In [121]:
scoring_uri = service.scoring_uri

# Get the key for authentication
key, _ = service.get_keys()

# Set the appropriate headers
headers = {"Content-Type": "application/json"}
headers["Authorization"] = f"Bearer {key}"

# Make the request and display the response and logs
data = json.dumps({
    'data':
    [
        {'cap-shape': 'x',
         'cap-surface': 's',
         'cap-color': 'n',
         'bruises': 't',
         'odor': 'p',
         'gill-attachment': 'f',
         'gill-spacing': 'c',
         'gill-size': 'n',
         'gill-color': 'k',
         'stalk-shape': 'e',
         'stalk-root': 'e',
         'stalk-surface-above-ring': 's',
         'stalk-surface-below-ring': 's',
         'stalk-color-above-ring': 'w',
         'stalk-color-below-ring': 'w',
         'veil-color': 'w',
         'ring-number': 'o',
         'ring-type': 'p',
         'spore-print-color': 'k',
         'population': 's',
         'habitat': 'u'
        },
        {'cap-shape': 'x',
         'cap-surface': 's',
         'cap-color': 'g',
         'bruises': 'f',
         'odor': 'n',
         'gill-attachment': 'a',
         'gill-spacing': 'w',
         'gill-size': 'b',
         'gill-color': 'k',
         'stalk-shape': 't',
         'stalk-root': 'e',
         'stalk-surface-above-ring': 's',
         'stalk-surface-below-ring': 's',
         'stalk-color-above-ring': 'w',
         'stalk-color-below-ring': 'w',
         'veil-color': 'w',
         'ring-number': 'o',
         'ring-type': 'e',
         'spore-print-color': 'n',
         'population': 'a',
         'habitat': 'g'
        }
    ]
})
resp = requests.post(scoring_uri, data=data, headers=headers)
print(resp.text)

["p", "e"]


We enable the Application Insights and print the logs.

In [120]:
service.update(enable_app_insights=True)
logs = service.get_logs()

for line in logs.split('\n'):
    print(line)

2021-07-07T19:21:34,537289200+00:00 - iot-server/run 
2021-07-07T19:21:34,552071200+00:00 - nginx/run 
2021-07-07T19:21:34,553826600+00:00 - rsyslog/run 
2021-07-07T19:21:34,570768300+00:00 - gunicorn/run 
File not found: /var/azureml-app/.
Starting HTTP server
EdgeHubConnectionString and IOTEDGE_IOTHUBHOSTNAME are not set. Exiting...
2021-07-07T19:21:35,061704100+00:00 - iot-server/finish 1 0
2021-07-07T19:21:35,064355700+00:00 - Exit code 1 is normal. Not restarting iot-server.
Starting gunicorn 20.1.0
Listening at: http://127.0.0.1:31311 (63)
Using worker: sync
worker timeout is set to 300
Booting worker with pid: 87
SPARK_HOME not set. Skipping PySpark Initialization.
Initializing logger
2021-07-07 19:21:36,689 | root | INFO | Starting up app insights client
logging socket was found. logging is available.
logging socket was found. logging is available.
2021-07-07 19:21:36,690 | root | INFO | Starting up request id generator
2021-07-07 19:21:36,691 | root | INFO | Starting up app in

## Cleaning resources

In [123]:
service.delete()

In [122]:
compute_target.delete()