# Hyperparameter Tuning using HyperDrive

TODO: Import Dependencies. In the cell below, import all the dependencies that you will need to complete the project.

In [1]:
from azureml.core import Workspace, Experiment
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException
from azureml.core import Environment
from azureml.core.dataset import Dataset
from azureml.widgets import RunDetails

from train import clean_data
import os
import pandas as pd
from train import clean_data
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import pickle
import joblib

from azureml.train.sklearn import SKLearn
from azureml.train.hyperdrive.run import PrimaryMetricGoal
from azureml.train.hyperdrive.policy import BanditPolicy
from azureml.train.hyperdrive.sampling import RandomParameterSampling
from azureml.train.hyperdrive.runconfig import HyperDriveConfig
from azureml.train.hyperdrive.parameter_expressions import uniform, choice
from azureml.core import ScriptRunConfig

## Dataset

TODO: Get data. In the cell below, write code to access the data you will be using in this project. Remember that the dataset needs to be external.

In [2]:
#ws = Workspace.from_config()
ws = Workspace.get(name="quick-starts-ws-132278")

experiment_name = 'hyperdrive'

experiment=Experiment(ws, experiment_name)

In [3]:
#  Create compute cluster

cluster_name = "cpu-cluster-1"
try:
    compute_target = ComputeTarget(workspace=ws, name=cluster_name)
    print('Found existing compute target')
except ComputeTargetException:
    print('Creating a new compute target...')
    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2', 
                                                           max_nodes=4)

    # create the cluster
    compute_target = ComputeTarget.create(ws, cluster_name, compute_config)

# can poll for a minimum number of nodes and for a specific timeout. 
# if no min node count is provided it uses the scale settings for the cluster
compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)

# use get_status() to get a detailed status for the current cluster. 
print(compute_target.get_status().serialize())

Found existing compute target
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned
{'currentNodeCount': 0, 'targetNodeCount': 0, 'nodeStateCounts': {'preparingNodeCount': 0, 'runningNodeCount': 0, 'idleNodeCount': 0, 'unusableNodeCount': 0, 'leavingNodeCount': 0, 'preemptedNodeCount': 0}, 'allocationState': 'Steady', 'allocationStateTransitionTime': '2020-12-29T08:30:38.244000+00:00', 'errors': None, 'creationTime': '2020-12-29T07:59:42.632975+00:00', 'modifiedTime': '2020-12-29T07:59:58.474047+00:00', 'provisioningState': 'Succeeded', 'provisioningStateTransitionTime': None, 'scaleSettings': {'minNodeCount': 0, 'maxNodeCount': 4, 'nodeIdleTimeBeforeScaleDown': 'PT120S'}, 'vmPriority': 'Dedicated', 'vmSize': 'STANDARD_D2_V2'}


## Hyperdrive Configuration

TODO: Explain the model you are using and the reason for chosing the different hyperparameters, termination policy and config settings.

In this project, we use Logistic regression, which is a linear classification model. 
- Hyperparameters: 
    - The inverse of regularization strength (C).  Smaller values of C implies more regularization and less model overfitting to training dataset. 
        - More values between 0.1 & 1.0 and fewer values less than 0.1 and greater than 1.0 were chosen
    - Max number of iterations (max_iter) allowed for solver convergence during model training.
        - Few values greater than default setting of 100 were chosen.       
- Early Termination policy: 
    - Bandit policy terminates any run that doesn't fall within the slack factor or slack amount of the evaluation metric with respect to the best performing run. 
    - It therefore increase the computational efficiency of training. 
- Sampling policy:
    - Random sampling policy is chosen & a total of 15 runs from 30 total possible combinations of hyperparameters will be chosen & run by Hyperdrive.

In [4]:
%%writefile conda_dependencies_1.yml

dependencies:
- python=3.6.2
- scikit-learn
- pip:
  - azureml-defaults

Overwriting conda_dependencies_1.yml


In [5]:
# Env needed on ACI
sklearn_env = Environment.from_conda_specification(name = 'sklearn-env', file_path = './conda_dependencies_1.yml')

In [6]:
# TODO: Create an early termination policy. This is not required if you are using Bayesian sampling.
early_termination_policy = BanditPolicy(evaluation_interval=2, slack_factor=0.1)

#TODO: Create the different params that you will be using during training
param_sampling = RandomParameterSampling( 
                    {
                    '--C': choice(0.001, 0.01, 0.1, 0.2, 0.3, 0.5, 0.7, 1.0, 10, 100),
                    '--max_iter': choice(100, 200, 300)
                    }
                )

#TODO: Create your estimator and hyperdrive config
#if "training" not in os.listdir():
#    os.mkdir("./training")
#import shutil
#project_folder = "./training"
#shutil.copy('train.py', project_folder)
project_folder = "./"
estimator = ScriptRunConfig(source_directory=project_folder,
                      script='train.py',
                      #arguments=['--C', '1.0', '--max_iter', 100],
                      compute_target=compute_target,
                      environment=sklearn_env)

# Create a HyperDriveConfig using the estimator, hyperparameter sampler, and policy.
hyperdrive_run_config = HyperDriveConfig(run_config=estimator,
                                     hyperparameter_sampling=param_sampling, 
                                     policy=early_termination_policy,
                                     primary_metric_name='Accuracy',
                                     primary_metric_goal=PrimaryMetricGoal.MAXIMIZE,
                                     max_total_runs=15,
                                     max_concurrent_runs=4)

In [7]:
#TODO: Submit your experiment
hyperdrive_run = experiment.submit(config=hyperdrive_run_config)

## Run Details

OPTIONAL: Write about the different models trained and their performance. Why do you think some models did better than others?

TODO: In the cell below, use the `RunDetails` widget to show the different experiments.

In [8]:
RunDetails(hyperdrive_run).show()
hyperdrive_run.wait_for_completion(show_output=True)

_HyperDriveWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO'…

RunId: HD_3662c5eb-a589-4899-9734-475bb5c399c7
Web View: https://ml.azure.com/experiments/hyperdrive/runs/HD_3662c5eb-a589-4899-9734-475bb5c399c7?wsid=/subscriptions/6b4af8be-9931-443e-90f6-c4c34a1f9737/resourcegroups/aml-quickstarts-132278/workspaces/quick-starts-ws-132278

Streaming azureml-logs/hyperdrive.txt

"<START>[2020-12-29T08:49:05.581214][API][INFO]Experiment created<END>\n""<START>[2020-12-29T08:49:06.730446][GENERATOR][INFO]Trying to sample '4' jobs from the hyperparameter space<END>\n""<START>[2020-12-29T08:49:07.047317][GENERATOR][INFO]Successfully sampled '4' jobs, they will soon be submitted to the execution target.<END>\n"<START>[2020-12-29T08:49:06.8903204Z][SCHEDULER][INFO]The execution environment is being prepared. Please be patient as it can take a few minutes.<END>

Execution Summary
RunId: HD_3662c5eb-a589-4899-9734-475bb5c399c7
Web View: https://ml.azure.com/experiments/hyperdrive/runs/HD_3662c5eb-a589-4899-9734-475bb5c399c7?wsid=/subscriptions/6b4af8be-9931-4

{'runId': 'HD_3662c5eb-a589-4899-9734-475bb5c399c7',
 'target': 'cpu-cluster-1',
 'status': 'Completed',
 'startTimeUtc': '2020-12-29T08:49:05.36004Z',
 'endTimeUtc': '2020-12-29T09:00:38.482503Z',
 'properties': {'primary_metric_config': '{"name": "Accuracy", "goal": "maximize"}',
  'resume_from': 'null',
  'runTemplate': 'HyperDrive',
  'azureml.runsource': 'hyperdrive',
  'platform': 'AML',
  'ContentSnapshotId': 'e257177e-9389-456b-aa36-3e53e0523037',
  'score': '0.757847533632287',
  'best_child_run_id': 'HD_3662c5eb-a589-4899-9734-475bb5c399c7_3',
  'best_metric_status': 'Succeeded'},
 'inputDatasets': [],
 'outputDatasets': [],
 'logFiles': {'azureml-logs/hyperdrive.txt': 'https://mlstrg132278.blob.core.windows.net/azureml/ExperimentRun/dcid.HD_3662c5eb-a589-4899-9734-475bb5c399c7/azureml-logs/hyperdrive.txt?sv=2019-02-02&sr=b&sig=%2BFCa9qEblmHrkaMWBp%2Bi2tTNvv6EC%2FeE79W2vpEqpeE%3D&st=2020-12-29T08%3A50%3A51Z&se=2020-12-29T17%3A00%3A51Z&sp=r'}}

In [9]:
assert(hyperdrive_run.get_status() == "Completed")

## Best Model

TODO: In the cell below, get the best model from the hyperdrive experiments and display all the properties of the model.

In [14]:
# Get your best run and save the model from that run.
best_run = hyperdrive_run.get_best_run_by_primary_metric()
best_run_metrics = best_run.get_metrics()
print("Best run details:", best_run.get_details())
print("Best run arguments:", best_run.get_details()['runDefinition']['arguments'])
print("Best run id:", best_run.id)
print("Best run accuracy:", best_run_metrics['Accuracy'])
print("Best run files:", best_run.get_file_names())


Best run details: {'runId': 'HD_3662c5eb-a589-4899-9734-475bb5c399c7_3', 'target': 'cpu-cluster-1', 'status': 'Completed', 'startTimeUtc': '2020-12-29T08:52:04.440447Z', 'endTimeUtc': '2020-12-29T08:54:47.993975Z', 'properties': {'_azureml.ComputeTargetType': 'amlcompute', 'ContentSnapshotId': 'e257177e-9389-456b-aa36-3e53e0523037', 'ProcessInfoFile': 'azureml-logs/process_info.json', 'ProcessStatusFile': 'azureml-logs/process_status.json'}, 'inputDatasets': [], 'outputDatasets': [], 'runDefinition': {'script': 'train.py', 'command': '', 'useAbsolutePath': False, 'arguments': ['--C', '0.1', '--max_iter', '200'], 'sourceDirectoryDataStore': None, 'framework': 'Python', 'communicator': 'None', 'target': 'cpu-cluster-1', 'dataReferences': {}, 'data': {}, 'outputData': {}, 'jobName': None, 'maxRunDurationSeconds': 2592000, 'nodeCount': 1, 'priority': None, 'credentialPassthrough': False, 'environment': {'name': 'sklearn-env', 'version': 'Autosave_2020-12-29T08:07:03Z_88d4692d', 'python': {

In [11]:
#TODO: Save the best model
model = best_run.register_model(model_name='hyperdrive', model_path='outputs/model-hyperdrive.joblib')
model.download(target_dir="outputs", exist_ok=True)

'outputs/model-hyperdrive.joblib'

In [12]:
# Evaluate on val set
data_file = "https://raw.githubusercontent.com/shbv/azure_ml/main/capstone/titanic_data.csv"
ds = Dataset.Tabular.from_delimited_files(data_file)

x, y = clean_data(ds)
x_train, x_test, y_train, y_test = train_test_split(x, y, random_state=0)
model = joblib.load('outputs/model-hyperdrive.joblib')
print("Accuracy:", accuracy_score(y_test, model.predict(x_test)))
print("Classification report:")
print(classification_report(y_test, model.predict(x_test)))
print("Confusion matrix:")
print(confusion_matrix(y_test, model.predict(x_test)))

Accuracy: 0.757847533632287
Classification report:
              precision    recall  f1-score   support

           0       0.76      0.90      0.82       139
           1       0.76      0.52      0.62        84

    accuracy                           0.76       223
   macro avg       0.76      0.71      0.72       223
weighted avg       0.76      0.76      0.75       223

Confusion matrix:
[[125  14]
 [ 40  44]]


Trying to unpickle estimator LogisticRegression from version 0.23.2 when using version 0.22.2.post1. This might lead to breaking code or invalid results. Use at your own risk.


## Model Deployment

Remember you have to deploy only one of the two models you trained.. Perform the steps in the rest of this notebook only if you wish to deploy this model.

TODO: In the cell below, register the model, create an inference config and deploy the model as a web service.

In [24]:
from azureml.core.environment import Environment
from azureml.core.model import InferenceConfig
from azureml.core.webservice import LocalWebservice, AciWebservice
from azureml.core import Model

# Create inference config and deployment config
# env = Environment.get(workspace=ws, name="AzureML-AutoML") # for auto_ml
env = sklearn_env # for hyperdrive
inference_config = InferenceConfig(entry_script='score.py',environment= env) 
deployment_config = AciWebservice.deploy_configuration(auth_enabled=True)

# Registered model hyperdrive is converted to model object
model = Model(ws, name='hyperdrive')  
# Deploy service using this model object
service = Model.deploy(workspace=ws, name='hyperdrive', models=[model], inference_config = inference_config, deployment_config = deployment_config)
service.wait_for_deployment(show_output=True)

Tips: You can try get_logs(): https://aka.ms/debugimage#dockerlog or local deployment: https://aka.ms/debugimage#debug-locally to debug if deployment takes longer than 10 minutes.
Running.....................................
Succeeded
ACI service creation operation finished, operation "Succeeded"


In [32]:
# Switch application insights on
service.update(enable_app_insights=True)
service.wait_for_deployment(show_output=True)

Healthy
http://f8d1e41d-c5d4-4c94-9e2d-28b0a90d2f61.southcentralus.azurecontainer.io/score
OcpXMJyELqbmAcJggQqegBSyeWacPxCA


In [34]:
# Check status
print(service.state)

# Get uri & keys
scoring_uri = service.scoring_uri
print(scoring_uri)
key, secondary_key = service.get_keys()
print(key)

http://f8d1e41d-c5d4-4c94-9e2d-28b0a90d2f61.southcentralus.azurecontainer.io/score
OcpXMJyELqbmAcJggQqegBSyeWacPxCA


TODO: In the cell below, send a request to the web service you deployed to test it.

In [30]:
import requests
import json

# URL for the web service, should be similar to:
#scoring_uri = ''
# If the service is authenticated, set the key or token
#key = ''

# 2 Data points to score, so we get 2 results back
data = {"data":
        [
          {
            "Pclass": 2,
            "Sex": 1,
            "Age": 40.0,
            "SibSp": 0,
            "Parch": 0,
            "Fare": 70,
            "Embarked": 1
          },
          {
            "Pclass": 1,
            "Sex": 0,
            "Age": 33.0,
            "SibSp": 0,
            "Parch": 0,
            "Fare": 70,
            "Embarked": 2
          }
        ]
    }
    
# Convert to JSON string
input_data = json.dumps(data)
with open("data.json", "w") as _f:
    _f.write(input_data)

# Set the content type
headers = {'Content-Type': 'application/json'}
# If authentication is enabled, set the authorization header
headers['Authorization'] = f'Bearer {key}'

# Make the request and display the response
resp = requests.post(scoring_uri, input_data, headers=headers)
print(resp.json())

[0, 1]


TODO: In the cell below, print the logs of the web service and delete the service

In [31]:
# Get logs
print(service.get_logs())

2020-12-29T10:13:47,811574900+00:00 - iot-server/run 
2020-12-29T10:13:47,918081100+00:00 - gunicorn/run 
2020-12-29T10:13:47,987940400+00:00 - rsyslog/run 
2020-12-29T10:13:48,010037700+00:00 - nginx/run 
/usr/sbin/nginx: /azureml-envs/azureml_59abd4256ad8e6688a4dc7593ce35cbc/lib/libcrypto.so.1.0.0: no version information available (required by /usr/sbin/nginx)
/usr/sbin/nginx: /azureml-envs/azureml_59abd4256ad8e6688a4dc7593ce35cbc/lib/libcrypto.so.1.0.0: no version information available (required by /usr/sbin/nginx)
/usr/sbin/nginx: /azureml-envs/azureml_59abd4256ad8e6688a4dc7593ce35cbc/lib/libssl.so.1.0.0: no version information available (required by /usr/sbin/nginx)
/usr/sbin/nginx: /azureml-envs/azureml_59abd4256ad8e6688a4dc7593ce35cbc/lib/libssl.so.1.0.0: no version information available (required by /usr/sbin/nginx)
/usr/sbin/nginx: /azureml-envs/azureml_59abd4256ad8e6688a4dc7593ce35cbc/lib/libssl.so.1.0.0: no version information available (required by /usr/sbin/nginx)
EdgeHubC

In [None]:
# Delete service
service.delete()