# Hyperparameter Tuning using HyperDrive

TODO: Import Dependencies. In the cell below, import all the dependencies that you will need to complete the project

In [None]:
import logging
import os
import shutil
import joblib

from azureml.core import Dataset, Workspace, Experiment
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException
from azureml.widgets import RunDetails

from azureml.train.sklearn import SKLearn
from azureml.train.hyperdrive.run import PrimaryMetricGoal
from azureml.train.hyperdrive.policy import BanditPolicy
from azureml.train.hyperdrive.sampling import RandomParameterSampling
from azureml.train.hyperdrive.runconfig import HyperDriveConfig
from azureml.train.hyperdrive.parameter_expressions import uniform, choice

## Dataset

TODO: Get data. In the cell below, write code to access the data you will be using in this project. Remember that the dataset needs to be external

In [None]:
ws = Workspace.from_config()
experiment_name = 'hyperd-experiment'

exp=Experiment(ws, experiment_name)

ds = Dataset.get_by_name(ws, 'heart-failure-clinical-records')

print('Workspace name: ' + ws.name, 
      'Azure region: ' + ws.location, 
      'Subscription id: ' + ws.subscription_id, 
      'Resource group: ' + ws.resource_group, sep = '\n')

run = exp.start_logging()

## Config Compute Cluster
Create a compute target for the HyperDrive run

In [None]:
# Choose a name for your CPU cluster
hd_compute_cluster_name = "hyper-drive"

# Verify that cluster does not exist already
try:
    compute_target = ComputeTarget(workspace=ws, name=hd_compute_cluster_name)
    print('Found existing cluster, use it.')
except ComputeTargetException:
    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2',
                                                           max_nodes=4)
    compute_target = ComputeTarget.create(ws, hd_compute_cluster_name, compute_config)

compute_target.wait_for_completion(show_output=True)

## Hyperdrive Configuration

Hyperparameter tuning is the process of finding the configuration of hyperparameters that results in the best performance. Random sampling supports early termination of low-performance runs.
The early termination policy uses the primary metric to identify low-performance runs.
BanditPolicy terminates runs where the primary metric is not within the specified slack factor/slack amount compared to the best performing run.

In [None]:
# TODO: Create an early termination policy. This is not required if you are using Bayesian sampling.
early_termination_policy = BanditPolicy(evaluation_interval=2, slack_factor=0.1)

if "training" not in os.listdir():
    os.mkdir("./training")

#TODO: Create the different params that you will be using during training
param_sampling = RandomParameterSampling(
    {
        '--C': uniform(0.0, 1.0), 
        '--max_iter': choice(50, 100, 150, 200, 250)
    }
)

#TODO: Create your estimator and hyperdrive config
estimator = SKLearn(source_directory = "./",
            compute_target=compute_target,
            vm_size='STANDARD_D2_V2',
            entry_script="train.py")

hyperdrive_run_config = HyperDriveConfig(estimator=estimator,
                                     hyperparameter_sampling=param_sampling,
                                     policy=early_termination_policy,
                                     primary_metric_name='Accuracy',
                                     primary_metric_goal=PrimaryMetricGoal.MAXIMIZE,
                                     max_total_runs=20,
                                     max_concurrent_runs=4)

In [None]:
#TODO: Submit your experiment
hyperdrive_run = exp.submit(hyperdrive_run_config, show_output=True)

## Run Details

TODO: In the cell below, use `RunDetails` widget to show the different experiments.

In [None]:
RunDetails(hyperdrive_run).show()

In [None]:
hyperdrive_run.get_status()

In [None]:
hyperdrive_run.wait_for_completion(show_output=True)

## Best Model

TODO: In the cell below, get the best model from the hyperdrive experiments and display all the properties of the model.

In [None]:
best_run = hyperdrive_run.get_best_run_by_primary_metric()
best_run_metrics = best_run.get_metrics()
parameter_values = best_run.get_details()['runDefinition']['arguments']

print('Best Run ID: ', best_run.id)
print('\n Metrics: ', best_run_metrics)
print('\n Parameters: ', parameter_values)
print('\n Accuracy: ', best_run_metrics['Accuracy'])

In [None]:
best_run

In [None]:
#TODO: Save the best model
best_run.download_file('/outputs/model.joblib', 'hyperdrive_model.joblib')

In [None]:
best_model_registered = best_run.register_model(model_name = 'best_hyperdrive_model', 
                                                model_path='outputs/model.joblib',
                                                description='best hyperdrive model')

## Model Deployment

Register the model, create an inference config and deploy the model as a web service

In [None]:
model = hyperdrive_run.register_model(model_name = model_name,
                                  description = 'HyperDrive model')

In [None]:
inference_config = InferenceConfig(entry_script = script_file, environment = env)

In [None]:
aci_config = AciWebservice.deploy_configuration(cpu_cores = 1, memory_gb = 1)

service_name = 'hyperdrive-heart-failure'

print(service_name)

Request to the web service to test it.

In [None]:
%run endpoint.py

Print the logs of the web service and delete the service

In [None]:
service = Model.deploy(ws, service_name, [model], inference_config, aci_config)
service.wait_for_deployment(True)
print("State: " + service.state)
print("Scoring URI: " + service.scoring_uri)

In [None]:
service.delete()