# Hyperparameter Tuning using HyperDrive

Import all the dependencies that you will need to complete the project.

In [None]:
from azureml.core import Workspace, Experiment
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

In [None]:
currDir=os.getcwd()
print(currDir)
os.listdir(currDir)

## Dataset

Getting data. Writing code to access the data used in this project. The dataset is external.

### Connect to a workspace

In [None]:
ws = Workspace.from_config()

print('Workspace name: ' + ws.name, 
      'Azure region: ' + ws.location, 
      'Subscription id: ' + ws.subscription_id, 
      'Resource group: ' + ws.resource_group, sep = '\n')

### Create an Azure ML experiment

In [None]:
# choose a name for experiment
experiment_name = 'hdr_heart_failure_experiment'
project_folder = './hyperdrive-model'
experiment=Experiment(ws, experiment_name)
experiment
run = experiment.start_logging()

### Create or Attach a Compute Resource

In [None]:
# Create compute cluster
# Use vm_size = "STANDARD_D12_V2" in provisioning configuration.
# max_nodes 6.

# Choose a name for CPU cluster
cluster_name = "my-cpu-cluster"

# Check if the compute target exists
try:
    compute_target = ComputeTarget(workspace=ws, name=cluster_name)
    print('Found existing compute target, use it')
except ComputeTargetException:
    print('Creating a new compute target...')
    compute_config = AmlCompute.provisioning_configuration(vm_size='Standard_D2_V2', 
                                                           max_nodes=6)
    # create the cluster
    compute_target = ComputeTarget.create(ws, cluster_name, compute_config)

compute_target.wait_for_completion(show_output=True)

# get a detailed status for the current cluster
print(compute_target.get_status().serialize())



## Hyperdrive Configuration

TODO: Explain the model you are using and the reason for chosing the different hyperparameters, termination policy and config settings.

In [None]:
from azureml.core import ScriptRunConfig
from azureml.core.environment import Environment
from azureml.widgets import RunDetails

from azureml.train.sklearn import SKLearn
from azureml.train.hyperdrive.run import PrimaryMetricGoal
from azureml.train.hyperdrive.policy import BanditPolicy
from azureml.train.hyperdrive.sampling import RandomParameterSampling
from azureml.train.hyperdrive.runconfig import HyperDriveConfig
from azureml.train.hyperdrive.parameter_expressions import uniform, choice
import os
import shutil


# Create an early termination policy. We are using Random Parameter Sampling.
early_termination_policy = BanditPolicy(evaluation_interval=2, slack_factor=0.1)

# Create the different params that you will be using during training
param_sampling = RandomParameterSampling({
        '--C': choice(0.001, 0.1, 0.5, 1.0, 2.0, 3.0, 5.0),
        '--max_iter': choice(50, 100, 150, 200)
    }
)

script_dir = "./training"
if "training" not in os.listdir():
    os.mkdir(script_dir)
    
shutil.copy('train.py', script_dir)


# Create a SKLearn estimator for use with train.py
estimator = SKLearn(source_directory=script_dir, entry_script='train.py', compute_target=compute_target)

# Create a HyperDriveConfig using the estimator, hyperparameter sampler, and policy.
hyperdrive_run_config = HyperDriveConfig(estimator=estimator, 
                             hyperparameter_sampling=param_sampling,
                             policy=early_termination_policy,
                             primary_metric_name='Accuracy', 
                             primary_metric_goal=PrimaryMetricGoal.MAXIMIZE, 
                             max_total_runs=24,
                             max_concurrent_runs=4)

In [None]:
# Submit experiment

hyperdrive_run = exp.submit(config=hyperdrive_run_config, show_output = True)

## Run Details


Using the `RunDetails` widget to show the different experiments.

In [None]:
RunDetails(hyperdrive_run).show()
hyperdrive_run.wait_for_completion(show_output=True)

In [None]:
hyperdrive_run.get_status()

In [None]:
hyperdrive_run

## Best Model

Get the best model from the hyperdrive experiments and display all the properties of the model.

In [None]:
# Get your best run 

best_run = hyperdrive_run.get_best_run_by_primary_metric()
best_run_metrics=best_run.get_metrics()
best_run_details = best_run.get_details() 
parameter_values = best_run.get_details()['runDefinition']['arguments']
best_run_files=best_run.get_file_names()

print('Best Run ID',best_run.id)
print('\n Metrics: ', best_run_metrics)
print('\n Parameters: ', parameter_values,sep='\n')
print('\nAccuracy of Best run',best_run_metrics['Accuracy'],sep='\n')
print('\nBest run file names',best_run_files,sep='\n')

In [None]:
# Save the best model

import joblib

best_run.download_file('/outputs/model.joblib', 'hyperdrive_model.joblib')

best_run = hyperdrive_run.get_best_run_by_primary_metric()
model = best_run.register_model(model_name='model', model_path='outputs/hyperdrive_model.joblib')

In [None]:
# Register the best model
model = best_run.register_model(model_name='best_hyperdrive_model', model_path='outputs/model.joblib')

## Model Deployment

Remember you have to deploy only one of the two models you trained.. Perform the steps in the rest of this notebook only if you wish to deploy this model.

TODO: In the cell below, register the model, create an inference config and deploy the model as a web service.

TODO: In the cell below, send a request to the web service you deployed to test it.

TODO: In the cell below, print the logs of the web service and delete the service