# Hyperparameter Tuning using HyperDrive

TODO: Import Dependencies. In the cell below, import all the dependencies that you will need to complete the project.

In [1]:
from azureml.core import Workspace, Experiment,ScriptRunConfig
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.exceptions import ComputeTargetException
from azureml.widgets import RunDetails
from azureml.train.sklearn import SKLearn
from azureml.train.hyperdrive.run import PrimaryMetricGoal
from azureml.train.hyperdrive.policy import BanditPolicy
from azureml.train.hyperdrive.sampling import RandomParameterSampling
from azureml.train.hyperdrive.runconfig import HyperDriveConfig
from azureml.train.hyperdrive.parameter_expressions import uniform,normal,choice
from azureml.core.model import InferenceConfig, Model
from azureml.core.webservice import AciWebservice, Webservice
from azureml.core.environment import Environment
from azureml.core.conda_dependencies import CondaDependencies
from azureml.data.dataset_factory import TabularDatasetFactory
from azureml.core.resource_configuration import ResourceConfiguration
import os
import joblib
import requests
import json
import sklearn


In [2]:
ws = Workspace.from_config()

experiment_name = 'capstone_project_hyperdrive'

experiment=Experiment(ws, experiment_name)


In [3]:
cpu_cluster_name = "cpu-cluster"

try:
    compute_target = ComputeTarget(workspace=ws, name=cpu_cluster_name)
    print("Found existing cpu-cluster")
except ComputeTargetException:
    print("Creating new cpu-cluster")
    
    compute_config = AmlCompute.provisioning_configuration(vm_size="STANDARD_D2_V2",
                                                           min_nodes=0,
                                                           max_nodes=4)
    compute_target = ComputeTarget.create(ws, cpu_cluster_name, compute_config)
    
compute_target.wait_for_completion(show_output=True)

Found existing cpu-cluster
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned


## Prepare Data

In [4]:
remote_dataset = TabularDatasetFactory.from_delimited_files("https://www.openml.org/data/get_csv/16826755/phpMYEkMl")
ds = remote_dataset.to_pandas_dataframe()

if "data" not in os.listdir():
    os.mkdir("./data")

if not os.path.isfile("data/titanic.csv"):
    ds.to_csv('data/titanic.csv',index = False)

## Hyperdrive Configuration

In this expiremnt we are using HyperDrive which helping us to cover a range of hyperparameters to find the best combination of parameteres to acheive the goal which in our case is Maximizing the Accuracy

to detirmine the hyperparametrs what we need to pass to the model and the range of values to cover we are using RandomParameterSampling, which takes the max number of iteration(--max_iter) as a chice of enumeration and the Regularization Strength (--c) as a value between .1 and 1

Another argument that we pass to the hyperdriveconfig is the stopping policy, we are using BanditPolicy, in our case each run which is less than 95% of the best performing run will be terminted, this will eliminate runs that get rsults we don't need.

There is the main argument which is the estimator which is your algorithm that you will apply, we are using SKLearn, this estimator takes the train.py which is the script file that contains your custome code.

The custome code in the train.py using the sklearn LogisticRegression and a method for cleaning the the data, splitting the data to training and testing set.

In [5]:
# TODO: Create an early termination policy. This is not required if you are using Bayesian sampling.
early_termination_policy = BanditPolicy(slack_factor = 0.05, evaluation_interval=3)  

#TODO: Create the different params that you will be using during training
param_sampling = RandomParameterSampling({
        "--max_iter": choice(50, 75),
        "--C": uniform(.4, .6)
       
    })

#TODO: Create your estimator and hyperdrive config
estimator = SKLearn(source_directory= './',entry_script='train.py',
compute_target = compute_target)

hyperdrive_run_config = HyperDriveConfig(estimator = estimator,
                                hyperparameter_sampling=param_sampling,
                                policy=early_termination_policy,
                                primary_metric_name='Accuracy',
                                primary_metric_goal=PrimaryMetricGoal.MAXIMIZE,
                                max_total_runs=12,
                                max_concurrent_runs = 4     
                                )

'SKLearn' estimator is deprecated. Please use 'ScriptRunConfig' from 'azureml.core.script_run_config' with your own defined environment or the AzureML-Tutorial curated environment.


In [6]:
#TODO: Submit your experiment
hyperdrive_run = experiment.submit(hyperdrive_run_config,show_output=True)



In [7]:
RunDetails(hyperdrive_run).show()

_HyperDriveWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO'…

In [8]:
hyperdrive_best_run = hyperdrive_run.get_best_run_by_primary_metric()

In [9]:
hyperdrive_best_run

Experiment,Id,Type,Status,Details Page,Docs Page
capstone_project_hyperdrive,HD_07da7aac-ead4-4a7f-b93f-aff4a9b06a99_2,azureml.scriptrun,Completed,Link to Azure Machine Learning studio,Link to Documentation


In [10]:
hyperdrive_best_run.download_file('outputs/model.pkl', output_file_path='./outputs/model.pkl')

In [8]:
model = Model.register(workspace=ws,
                       model_name='capstone_hyperdrive_best_model',          
                       model_path='./outputs/model.pkl', 
                       model_framework=Model.Framework.SCIKITLEARN,
                       model_framework_version=sklearn.__version__)

Registering model capstone_hyperdrive_best_model


## Web Service Deployment

In [9]:
from azureml.core.environment import Environment
from azureml.core.conda_dependencies import CondaDependencies

myenv = Environment("myenv")
conda_dep = CondaDependencies()


# # Define the packages needed by the model and scripts
conda_dep.add_conda_package("numpy")
conda_dep.add_conda_package("scikit-learn")
# # You must list azureml-defaults as a pip dependency
conda_dep.add_pip_package("azureml-defaults")
conda_dep.add_pip_package("azureml-core")
conda_dep.add_pip_package("joblib")

# # Adds dependencies to PythonSection of myenv
myenv.python.conda_dependencies=conda_dep

myenv.register(workspace=ws)

myenv = Environment.get(workspace=ws, name="myenv")

In [10]:
inference_config = InferenceConfig(entry_script="score.py",
                                   environment=myenv)

deployment_config = AciWebservice.deploy_configuration(cpu_cores = 1,
                                                       memory_gb = 1,
                                                       auth_enabled=True,
                                                       enable_app_insights=True)



In [11]:
service_name = 'hyperdrivewebservice4'
service = Model.deploy(ws, service_name, [model],inference_config=inference_config,deployment_config=deployment_config)
service.wait_for_deployment(True)

Tips: You can try get_logs(): https://aka.ms/debugimage#dockerlog or local deployment: https://aka.ms/debugimage#debug-locally to debug if deployment takes longer than 10 minutes.
Running.......................
Succeeded
ACI service creation operation finished, operation "Succeeded"


### Test the deployed service

In [12]:
data = {
  "data": [
    {
                  "pclass": 1,
                  "sex": 1,
                  "age": 20,
                  "sibsp": 1,
                  "parch": 1,
                  "embarked": 2
    }
  ],
  "method": "predict"
}

In [13]:
input_data = json.dumps(data)

primaryKey, secondaryKey = service.get_keys()

scoring_uri = service.scoring_uri

# Set the content type
headers = {'Content-Type': 'application/json'}
# If authentication is enabled, set the authorization header
headers['Authorization'] = f'Bearer {primaryKey}'

# Make the request and display the response
resp = requests.post(scoring_uri, input_data,headers=headers)

print(resp.json())

[0]


## Clear Resources

In [None]:
# Delete the web service
service.delete()

In [None]:
# Delete Compute Resources

try:
    compute_target.delete()
except ComputeTargetException:
    print("cpu-cluster Not Found")