# Hyperparameter Tuning using HyperDrive

TODO: Import Dependencies. In the cell below, import all the dependencies that you will need to complete the project.

In [1]:
from azureml.core import Workspace, Experiment, Dataset, Datastore, Model
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException
from azureml.widgets import RunDetails
from azureml.train.sklearn import SKLearn
from azureml.train.hyperdrive.run import PrimaryMetricGoal
from azureml.train.hyperdrive.policy import BanditPolicy
from azureml.train.hyperdrive.sampling import GridParameterSampling, RandomParameterSampling
from azureml.train.hyperdrive.runconfig import HyperDriveConfig
from azureml.train.hyperdrive.parameter_expressions import choice, uniform
from azureml.core.environment import Environment
from azureml.core.model import InferenceConfig
from azureml.core.webservice import AciWebservice, Webservice
from azureml.core.authentication import InteractiveLoginAuthentication
#import pickle
import numpy as np
import pandas as pd
import json
import requests

In [2]:
ws = Workspace.from_config()
experiment_name = 'house-price'
experiment1=Experiment(ws, experiment_name)

In [3]:
if "training" not in os.listdir():
    os.mkdir("./training")
if "hyper" not in os.listdir():
    os.mkdir("./hyper")

## Create or use a compute target

In [4]:
compute_name = "nuria-p3"
try:
    compute_target = ComputeTarget(workspace=ws, name=compute_name)
    print('Using existing compute target.')
except ComputeTargetException:
    print('Creating compute target.')
    cluster_type = 1
    provisioning_config = AmlCompute.provisioning_configuration(vm_size='Standard_D12_V2', min_nodes=1, max_nodes=5)
    compute_target = ComputeTarget.create(ws, compute_name, provisioning_config)

compute_target.wait_for_completion(show_output=True)

Using existing compute target.
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned


## Dataset

TODO: Get data. In the cell below, write code to access the data you will be using in this project. Remember that the dataset needs to be external.

In [5]:
# load data and create datasets for training and testing
from sklearn.datasets import fetch_california_housing
from train import create_datasets
  
x,y = fetch_california_housing(return_X_y=True)
x_train,x_test,y_train,y_test = create_datasets(x,y)

In [None]:
#datastore=ws.get_default_datastore()  
# data = pd.concat([x_train,y_train], axis=1)

## Hyperdrive Configuration

TODO: Explain the model you are using and the reason for chosing the different hyperparameters, termination policy and config settings.

In [6]:
# TODO: Create an early termination policy. This is not required if you are using Bayesian sampling.
early_termination_policy = BanditPolicy(slack_factor = 0.2, evaluation_interval = 1, delay_evaluation = 5)

In [8]:
## for logistic
param_sampling = RandomParameterSampling({"alpha": uniform(0.0001,0.01),
                                          "l1_ratio": uniform(0,1),
                                          "eta0": uniform(0.1,0.9),
                                          "power_t": uniform(0.01,0.99)})

In [9]:
#TODO: Create your estimator and hyperdrive config
estimator = SKLearn(source_directory='.', 
                    compute_target=compute_name, 
                    entry_script='./train.py')

'SKLearn' estimator is deprecated. Please use 'ScriptRunConfig' from 'azureml.core.script_run_config' with your own defined environment or the AzureML-Tutorial curated environment.


In [11]:
hyperdrive_run_config = HyperDriveConfig(estimator=estimator,  
                             hyperparameter_sampling=param_sampling,
                             policy=early_termination_policy,
                             primary_metric_name="Accuracy",                    
                             primary_metric_goal=PrimaryMetricGoal.MAXIMIZE,
                             max_total_runs=50,        
                             max_concurrent_runs=4)

In [12]:
#TODO: Submit your experiment
hyperdrive_run = experiment1.submit(hyperdrive_run_config)



## Run Details

OPTIONAL: Write about the different models trained and their performance. Why do you think some models did better than others?

TODO: In the cell below, use the `RunDetails` widget to show the different experiments.

In [13]:
RunDetails(hyperdrive_run).show()
hyperdrive_run.wait_for_completion(show_output=False)

_HyperDriveWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO'…

{'runId': 'HD_37e860ba-3a65-4702-9cb5-efb747471b29',
 'target': 'nuria-p3',
 'status': 'Completed',
 'startTimeUtc': '2021-01-22T16:43:22.26564Z',
 'endTimeUtc': '2021-01-22T17:16:53.520713Z',
 'properties': {'primary_metric_config': '{"name": "Accuracy", "goal": "maximize"}',
  'resume_from': 'null',
  'runTemplate': 'HyperDrive',
  'azureml.runsource': 'hyperdrive',
  'platform': 'AML',
  'ContentSnapshotId': '8d47975d-f428-4787-98ca-e98aa1e53b6b',
  'score': '0.6116870558605988',
  'best_child_run_id': 'HD_37e860ba-3a65-4702-9cb5-efb747471b29_9',
  'best_metric_status': 'Succeeded'},
 'inputDatasets': [],
 'outputDatasets': [],
 'logFiles': {'azureml-logs/hyperdrive.txt': 'https://mlstrg135708.blob.core.windows.net/azureml/ExperimentRun/dcid.HD_37e860ba-3a65-4702-9cb5-efb747471b29/azureml-logs/hyperdrive.txt?sv=2019-02-02&sr=b&sig=FootI2hESR0ixlM3UBmQyfQHvvLxpVNZ60SAHOSnMHI%3D&st=2021-01-22T17%3A07%3A50Z&se=2021-01-23T01%3A17%3A50Z&sp=r'}}

## Best Model

TODO: In the cell below, get the best model from the hyperdrive experiments and display all the properties of the model.

In [14]:
import joblib
best_run = hyperdrive_run.get_best_run_by_primary_metric()   
best_run_metrics = best_run.get_metrics()
parameters = best_run.get_details()['runDefinition']['arguments']
joblib.dump(parameters, filename='hyper/best-hyperdrive.joblib')

print('Best Run Id: ', best_run.id)
print('\n R2:', best_run_metrics['Accuracy'])                 


Best Run Id:  HD_37e860ba-3a65-4702-9cb5-efb747471b29_9

 R2: 0.6116870558605988


In [15]:
hyperdrive_run.get_children_sorted_by_primary_metric()
result = hyperdrive_run.get_metrics()

In [17]:
accuracy = []
alpha = []
l1_ratio = []
eta0 = []
power_t = []
iteration = list(result.keys())
colum = ['Iteration','R2 Score','Alpha','L1 Ratio','Eta0','Power t']
for i in range(len(result)):
    accuracy.append(result[iteration[i]]['Accuracy'])
    alpha.append(result[iteration[i]]['Alpha'])
    l1_ratio.append(result[iteration[i]]['L1 Ratio'])
    eta0.append(result[iteration[i]]['Eta0'])
    power_t.append(result[iteration[i]]['Power t'])
out = pd.DataFrame(np.column_stack((iteration, accuracy,alpha,l1_ratio,eta0,power_t)),columns=colum)  
out_sorted = out.sort_values(by=['R2 Score'], ascending=False, inplace=True, kind='quicksort', na_position='last')
out.head(10)

Unnamed: 0,Iteration,R2 Score,Alpha,L1 Ratio,Eta0,Power t
49,HD_37e860ba-3a65-4702-9cb5-efb747471b29_9,0.6116870558605988,0.0007986619990074,0.5235010292764468,0.2867176541015094,0.669642173252586
10,HD_37e860ba-3a65-4702-9cb5-efb747471b29_18,0.6116514613371163,0.0013435675407817,0.6335809013124988,0.7159402483560853,0.5065135197304633
27,HD_37e860ba-3a65-4702-9cb5-efb747471b29_33,0.6114934608628351,0.0021169702871606,0.5821851303976797,0.2280471886581596,0.5307450909605927
22,HD_37e860ba-3a65-4702-9cb5-efb747471b29_29,0.6114526624730892,0.0021716872209849,0.0565144035109714,0.8210787521796338,0.5920671303281351
23,HD_37e860ba-3a65-4702-9cb5-efb747471b29_3,0.611394464036056,0.0026094354318092,0.6263225442991867,0.5644601152506094,0.7282752340134538
15,HD_37e860ba-3a65-4702-9cb5-efb747471b29_22,0.6109420624216573,0.004520856229108,0.3004015107599511,0.5934869664939394,0.7986519888838047
8,HD_37e860ba-3a65-4702-9cb5-efb747471b29_16,0.6108558373750839,0.0043389959515811,0.7472021234254752,0.6019918164055924,0.7363143757216521
11,HD_37e860ba-3a65-4702-9cb5-efb747471b29_19,0.6108449379366385,0.0035007556545572,0.2243135966675681,0.5595019464452224,0.431534657753575
28,HD_37e860ba-3a65-4702-9cb5-efb747471b29_34,0.6107901468316861,0.004810219918005,0.4945222868865082,0.2750095779218333,0.6683804845742485
45,HD_37e860ba-3a65-4702-9cb5-efb747471b29_5,0.6107176560781946,0.0037838385921842,0.8950439002084721,0.4901456053588008,0.8346941444397714


In [18]:
#TODO: Save the best model
joblib.dump(parameters, filename='hyper/best-hyperdrive.joblib')  

['hyper/best-hyperdrive.joblib']

In [19]:
# Register the model
#model = best_run.register_model(model_name='best-hyperdrive', model_path='hyper/best-hyperdrive.joblib')
model = best_run.register_model(model_name='best-hyperdrive', model_path='.')

## Since the AutoML experiment was able to produce a better model, I will not deploy this one. This Exercise ends here.