# Hyperparameter Tuning using HyperDrive

## Import libraries for Azure Machine Learning SDK

In [None]:
import os
import shutil
import numpy as np 
import pandas as pd 
import azureml.core
import pkg_resources
import joblib
import requests
import json

from matplotlib import pyplot as plt
from sklearn import datasets
from sklearn.model_selection import train_test_split

from azureml.data.dataset_factory import TabularDatasetFactory
from azureml.widgets import RunDetails

from azureml.core import Environment, Workspace, Experiment, ScriptRunConfig
from azureml.core.conda_dependencies import CondaDependencies
from azureml.core.model import Model, InferenceConfig
from azureml.core.webservice import Webservice, AciWebservice

from azureml.train.sklearn import SKLearn
from azureml.train.hyperdrive.policy import BanditPolicy, MedianStoppingPolicy
from azureml.train.hyperdrive.run import PrimaryMetricGoal
from azureml.train.hyperdrive.runconfig import HyperDriveConfig
from azureml.train.hyperdrive.sampling import RandomParameterSampling
from azureml.train.hyperdrive.parameter_expressions import uniform, choice

# Check the core SDK version number
print("SDK version:", azureml.core.VERSION)

## Dataset
### Initialize Workspace
Initialize a workspace object from persisted configuration. Make sure the config file is present at .\config.json

In [None]:
ws = Workspace.from_config()

print('Workspace name:\t' + ws.name,
      'Resource group:\t' + ws.resource_group,
      'Azure region:\t' + ws.location,
      'Subscription id:\t' + ws.subscription_id, sep='\n')

### Create an Azure ML experiment

In [None]:
# Name for experiment
experiment_name = 'hyperdrive-heart-failure-experiment'

experiment=Experiment(ws, experiment_name)
run = experiment.start_logging()

experiment

### Create or Attach an AmlCompute Target
We will need to create a compute target for our AutoML run. We will use ***vm_size = Standard_DS3_v2*** in our provisioning configuration and select ***max_nodes*** to be no greater than 4.

In [None]:
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

# Name for the CPU cluster
amlcompute_cluster_name = "hyperdrive-cpu-compute-cluster"

# Verify that cluster does not exist already
try:
    amlcompute_target = ComputeTarget(workspace=ws, name=amlcompute_cluster_name)
    print('Found existing cluster, use it.')
except ComputeTargetException:
    amlcompute_config = AmlCompute.provisioning_configuration(vm_size='Standard_DS3_v2', max_nodes=4)
    amlcompute_target = ComputeTarget.create(ws, amlcompute_cluster_name, amlcompute_config)

amlcompute_target.wait_for_completion(show_output=True)

In [None]:
compute_targets = ws.compute_targets

for i, key in enumerate(compute_targets):
    print(f"{i+1}. Compute target\n\tname: {compute_targets[key].name}\n\tType: {compute_targets[key].type}")

In [None]:
# For a more detailed view of current AmlCompute status, use get_status().
print(amlcompute_target.get_status().serialize())

### Dataset

In [None]:
from azureml.core.dataset import Dataset

# Try to load the dataset from the Workspace. Otherwise, create it from the file
description_text = "Health Failure dataset from UCI ML-Repository for mortality prediction for the Capstone Project."
key = "HealthFailure Dataset"      # the key to match the dataset name

dataset_url = "https://archive.ics.uci.edu/ml/machine-learning-databases/00519/heart_failure_clinical_records_dataset.csv"

if key in ws.datasets.keys():
    dataset = ws.datasets[key]
    print("The Dataset was found!")
else:
    dataset = Dataset.Tabular.from_delimited_files(dataset_url) # Create AML Dataset and register it into Workspace
    dataset = dataset.register(workspace=ws, name=key, description=description_text) # Register Dataset in Workspace

df = dataset.to_pandas_dataframe()

In [None]:
df.describe()

In [None]:
df.head()

#### Prepare the datasets for the Automation

In [None]:
from sklearn.model_selection import train_test_split
from azureml.data.dataset_factory import TabularDatasetFactory

# Split the dataset into training and testing datasets
train_df, test_df = train_test_split(df, test_size=0.2, shuffle=True)

# Save training data to csv file
train_df.to_csv("./data/train_data.csv", index=False)

# Read saved training data and create a dataset in Azure ML
data_store = ws.get_default_datastore()
data_store.upload(src_dir="./data", target_path="training_data")
train_ds = TabularDatasetFactory.from_delimited_files(path=[(data_store, 'training_data/train_data.csv')])

#### Review the Training Dataset Result

In [None]:
train_ds.take(5).to_pandas_dataframe()

## Hyperdrive Configuration

TODO: Explain the model you are using and the reason for chosing the different hyperparameters, termination policy and config settings.

The pipeline we use here consists of a custom logistic regression model of the scikit-learn model stored in the ***train.py*** script and a hyperdrive run that iterates through the model parameters.
- Setting logistic regression parameters:
    - --C - Inverse of regularization strenght
    - --max_iter - Maximum number of iterations convergence

***RandomParameterSampling*** defines random sampling over a hyperparameter search space. This sampling algorithm selects parameter values from a series of discrete values or a distribution over a continuous range. This has an advantage over the GridSearch method, which executes all combinations of parameters and takes a long time to execute.

For the "*Inverse of regularization strength*" parameter, I chose a uniform distribution with min=0.0001 and max=1.0. For the maximum number of iterations convergence, I entered a range of values (5, 25, 50, 100, 200, 500, 1000).

***BanditPolicy*** class Defines a early termination policy based on Slack criteria and a frequency and delay interval for evaluation. This goes a long way towards ensuring that if a model with certain parameters does not work well, it is rejected instead of letting it run longer.

The training takes place on the local computing target. Accuracy was chosen as the primary metric with the goal of maximizing accuracy. To keep the training short, max_total_runs was set to 50 and max_duration_minutes to 20.

***max_concurrent_runs = 4*** : The maximum number of iterations that could be run in parallel. It is recommended to create a dedicated cluster per experiment and adjust the number of max_concurrent_iterations of your experiment to the number of nodes in the cluster. In this way you use all nodes of the cluster at the same time with the desired number of concurrent child runs/iterations. So I set the value to 4.

In [None]:
# TODO: Create an early termination policy. This is not required if you are using Bayesian sampling.
###early_termination_policy = <your policy here>
early_termination_policy = BanditPolicy(evaluation_interval=2, slack_factor=0.1)

#TODO: Create the different params that you will be using during training
###param_sampling = <your params here>
param_sampling = RandomParameterSampling(
    {
        "--C": uniform(0.0001, 1.0),
        "--max_iter": choice(5, 25, 50, 100, 200, 500, 1000)
    }
)

script_folder = "./training"

if "training" not in os.listdir():
    os.mkdir(script_folder)
    
shutil.copy('./train.py', script_folder)

#TODO: Create your estimator and hyperdrive config
###estimator = <your estimator here>
estimator = SKLearn(source_directory=script_folder,
                    compute_target=amlcompute_target,
                    entry_script="train.py")

###hyperdrive_run_config = <your config here>
hyperdrive_run_config = HyperDriveConfig(estimator=estimator,
                                         hyperparameter_sampling=param_sampling,
                                         primary_metric_name="Accuracy",
                                         primary_metric_goal=PrimaryMetricGoal.MAXIMIZE,
                                         max_total_runs=50,
                                         max_duration_minutes=20,
                                         max_concurrent_runs=4,
                                         policy=early_termination_policy)

In [None]:
#TODO: Submit your experiment
# Submit the experiment on the configured remote compute cluster
hyperdrive_run = experiment.submit(config=hyperdrive_run_config)

In [None]:
hyperdrive_run.wait_for_completion(show_output=True)

## Run Details

OPTIONAL: Write about the different models trained and their performance. Why do you think some models did better than others?

Use the `RunDetails` widget to show the different experiments.

In [None]:
RunDetails(hyperdrive_run).show()
for children_run in hyperdrive_run.get_children():
    print('-----------------------------------')
    print(children_run)

## Best Model

TODO: In the cell below, get the best model from the hyperdrive experiments and display all the properties of the model.

In [None]:
# evaluate if the the run is indeed complete
assert(hyperdrive_run.get_status() == "Completed")

In [None]:
# get the best run and display the properties of the model
best_run = hyperdrive_run.get_best_run_by_primary_metric()
best_run_metrics = best_run.get_metrics()
parameter_values = best_run.get_details()['runDefinition']['arguments']

print("Best Experiment Run:")
print("-------------------------------------------")
print('Best Run Id: ', best_run.id)
print('Accuracy:', best_run_metrics['Accuracy'])
print('Regularization Strength:',best_run_metrics['Regularization Strength:'])
print('Max iterations:',best_run_metrics['Max iterations:'])

best_run

In [None]:
best_run.get_details()

In [None]:
best_run.get_metrics()

In [None]:
for primary_metric in best_run.get_metrics():
    metric=best_run_metrics[primary_metric]
    print(primary_metric,metric)

In [None]:
best_run.get_metrics(name='Accuracy')

In [None]:
best_run.get_properties()

### Test the best model

In [None]:
# Split into x and y tests
y_test = test_df['DEATH_EVENT']
x_test = test_df.drop(['DEATH_EVENT'],axis=1)

In [None]:
from sklearn.metrics import confusion_matrix

# Test the best model and create a confusion matrix
ypred = best_model.predict(x_test)
cmatrix = confusion_matrix(y_test, ypred)

# Visualize the confusion matrix
##pd.DataFrame(cmatrix)
pd.DataFrame(cmatrix).style.background_gradient(cmap='Blues', low=0, high=0.9)

### Save the best model

In [None]:
best_run.get_file_names()

In [None]:
best_run.register_model(model_name='best_run_hyperdrive', model_path='./outputs/')

In [None]:
from azureml.automl.core.shared import constants

# create inference folder
inference_folder = 'inference'
if inference_folder not in os.listdir():
    os.mkdir(inference_folder)

# Save the best model
##joblib.dump(best_model, filename = inference_folder + '/best_automl_model.joblib')
##joblib.dump(value=best_model, filename='output/best-automl.pkl')

# Save the best model, scoring script, and conda env files in inference folder
best_run.download_file('outputs/scoring_file_v_1_0_0.py', inference_folder + '/best_hyperdrive_score.py')
best_run.download_file('outputs/model.pkl', inference_folder + '/best_hyperdrive_model.pkl')

best_run.download_file('outputs/conda_env_v_1_0_0.yml', inference_folder + 'hyperdrive_conda_env.yml')
##best_run.download_file(constants.CONDA_ENV_FILE_PATH, inference_folder + 'hyperdrive_conda_env.yml')

### Save the environment

In [None]:
# get the list of environments
Environment.list(workspace=ws).keys()

In [None]:
# save the environment
my_env = Environment.get(workspace=ws, name="AzureML-AutoML")
my_env.save_to_directory('env', overwrite=True)

my_env

## Model Deployment

Remember you have to deploy only one of the two models you trained but you still need to register both the models. Perform the steps in the rest of this notebook only if you wish to deploy this model.

TODO: In the cell below, register the model, create an inference config and deploy the model as a web service.

In [None]:
# Register the model
from azureml.core.resource_configuration import ResourceConfiguration

model_name = best_run.properties['model_name']
local_file = inference_folder + '/best_automl_model.pkl'

run_id = best_run.id
experiment_name = best_run.experiment.name

model = Model.register(workspace = ws,
                       model_name = model_name,                        # Name of the registered model in your workspace.
                       model_path = local_file,                        # Local file to upload and register as a model.
                       model_framework = Model.Framework.SCIKITLEARN,  # Framework used to create the model.
                       model_framework_version = sklearn.__version__,  # Version of scikit-learn used to create the model.
                       description = 'Best autoML model to predict motality caused by heart failure.',
                       tags={'area': 'heart-failure', 'type': 'classification'})

print('Model name:', model.name)
print('Model id:', model.id)
print('Model version:', model.version)

In [None]:
# create inference configuration
from azureml.core.environment import Environment
from azureml.core.model import InferenceConfig

env = Environment.from_conda_specification(name="my_env", file_path=inference_folder + 'automl_conda_env.yml')
env
#### Siehe Oben my_env --> Ist es gleich?

inference_config = InferenceConfig(entry_script=inference_folder + '/best_automl_score.py', environment=env)

# display the environment file
with open(file_path=inference_folder + 'automl_conda_env.yml', 'r') as file:
    env_file = file.read()
    print(env_file)

In [None]:
# Model Deployment
from azureml.core.webservice import AciWebservice

# define deployment configuration
aci_deployment_config = AciWebservice.deploy_configuration(cpu_cores=1,
                                                           memory_gb=1,
                                                           tags={'area': "heart-failure", 'type': "classification"},
                                                           description="Predict heart failure mortality using classification model",
                                                           auth_enabled=True,
                                                           enable_app_insights=True)

# deploy model as webservice using Azure Container Instance(ACI)
aci_service = Model.deploy(workspace = ws, 
                           name = "aci-heart-failure-deploy", 
                           models = [model], 
                           inference_config = inference_config, 
                           deployment_config = aci_deployment_config, 
                           overwrite=True)

aci_service.wait_for_deployment(show_output=True)

In [None]:
# get the active api endpoint for scoring
print(f"Service State: {aci_service.state}\n")
print(f"Scoring URI:   {aci_service.scoring_uri}\n")
print(f"Swagger URI:   {aci_service.swagger_uri}\n")

### Consuming the model
TODO: In the cell below, send a request to the web service you deployed to test it.

In [None]:
# Send a request to the web service
import json
import requests

'''
input_data = json.dumps({
    "data": [
            [75.0, 0.0, 582.0, 0.0, 20.0, 1.0, 265000.0, 1.9, 130.0, 1.0, 0.0, 4.0],
            [80.0, 1.0, 123.0, 0.0, 35.0, 1.0, 388000.0, 9.4, 133.0, 1.0, 1.0, 10.0],
            [62.0, 0.0, 61.0, 1.0, 38.0, 1.0, 155000.0, 1.1, 143.0, 1.0, 1.0, 270.0],
            [50.0, 1.0, 111.0, 0.0, 20.0, 0.0, 210000.0, 1.9, 137.0, 1.0, 0.0, 7.0]
        ]
    })
'''

# 4 sets of data to score, so we get two results back
test_sample = test_df.sample(n=4)
labels = test_sample.pop('DEATH_EVENT')


# Convert to JSON string
input_data = json.dumps({"data": test_sample.to_dict(orient='records')})
with open("input_data.json", 'w') as _f:
    _f.write(input_data)

print(input_data)

response = requests.post(aci_service.scoring_uri, data=input_data, headers={'Content-Type':'application/json'})

In [None]:
print(f"Predictions from Service: {response.json()}\n")
print(f"Data Labels: {labels.tolist()}")

### Print the logs of the web service and delete the service
TODO: In the cell below, print the logs of the web service and delete the service

In [None]:
# Print the log of the webservice
print(aci_service.get_logs())

In [None]:
# Delete the webservice, model, and shut down the compute cluster
aci_service.delete()
model.delete()
amlcompute_target.delete()

**Submission Checklist**
- I have registered the model.
- I have deployed the model with the best accuracy as a webservice.
- I have tested the webservice by sending a request to the model endpoint.
- I have deleted the webservice and shutdown all the computes that I have used.
- I have taken a screenshot showing the model endpoint as active.
- The project includes a file containing the environment details.

