# Getting workspace

In [None]:
from azureml.core import Workspace,Dataset,Environment,Experiment,ComputeTarget,ScriptRunConfig
from azureml.train.hyperdrive import BayesianParameterSampling, HyperDriveConfig,PrimaryMetricGoal
from azureml.train.hyperdrive import choice

ws = Workspace.from_config()
print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\n')

# Getting predefined dataset, compute and environment

In [None]:
#Getting dataset
training_data=Dataset.get_by_name(ws,'Diabetes_Processed')
#Getting compute target
compute_instance = ComputeTarget(workspace=ws, name='EXTNAN1')
#Getting environment
environment = Environment.get(ws,'Diabetes-Model-Environment')

# Defining run config
The ScriptRunConfig class is reused for Hyper parameter tuning and is used to define a HyperDriveConfig </br>
Input variables defined are general ones used for the script, such as input data and target column. </br>
The script train_hyper.py has tuning parameters as input variables as well and is required for the hyper parameter tunning, which is defined using a ParameterSampling class, as can be seen below.

In [None]:
#Script run config
src = ScriptRunConfig(source_directory="./scripts",
                      script='./train_hyper.py',
                        arguments=[
                        "--input-data",
                        training_data.as_named_input("input_data"),
                        "--target_column",
                        "Binary_Target"
                        ],
                      compute_target=compute_instance,
                      environment=environment)

# HyperDriveConfig and ParameterSampling
Setting search space, defining sampling methods and termination policies

Docs: </br>
[Code Docs](https://docs.microsoft.com/en-us/python/api/azureml-train-core/azureml.train.hyperdrive?view=azure-ml-py#classes) </br>
[How to](https://docs.microsoft.com/en-us/azure/machine-learning/how-to-tune-hyperparameters) </br>
[Defining Search Space](https://docs.microsoft.com/en-us/azure/machine-learning/how-to-tune-hyperparameters#sampling-the-hyperparameter-space)</br>
[Parameter Sampling Methods](https://docs.microsoft.com/en-us/azure/machine-learning/how-to-tune-hyperparameters#sampling-the-hyperparameter-space)

In [None]:
##Setting sampling parameters
param_sampling = BayesianParameterSampling( {
        "learning_rate": choice(0.1,0.09,0.08,0.07,0.06),
        "n_estimators": choice(25,50,75,100,125,150),
        "max_depth": choice(3,4,5)
    }
)

#### Defining the HyperDriveConfig and submitting run
NB: </br>
_primary_metric_name: The name of the primary metric needs to exactly match the name of the metric logged by the training script_

In [None]:
#Hyperdrive config
hd_config = HyperDriveConfig(run_config=src,
                                hyperparameter_sampling=param_sampling,
                                #policy=early_termination_policy,
                                primary_metric_name='F1_score',
                                primary_metric_goal=PrimaryMetricGoal.MAXIMIZE,
                                max_total_runs=10,
                                max_concurrent_runs=4)

In [None]:
experiment = Experiment(ws, 'HyperparameterTuning_Diabetes')
hyperdrive_run = experiment.submit(hd_config)

In [None]:
from azureml.widgets import RunDetails
RunDetails(hyperdrive_run).show()

# Getting the best model run and metrics

In [None]:
best_run = hyperdrive_run.get_best_run_by_primary_metric()
best_run_metrics = best_run.get_metrics()
parameter_values = best_run.get_details()['runDefinition']['arguments']

print('Best Run Id: ', best_run.id)
print('\n F1 Score:', best_run_metrics['F1_score'])
print('\n learning rate:',best_run_metrics['Learning Rate'])
print('\n n_estimators:',best_run_metrics['n_estimators'])
print('\n max depth:',best_run_metrics['Max Depth'])
print('\n Parameters:', parameter_values)

# Downloading files from best run

In [None]:
#Downloading model object from best run
print(best_run.get_file_names())
best_run.download_files(prefix='./outputs/model/',output_directory='./best-run-model')

# Registering best model 

In [None]:
#Registering model from best run
best_run.register_model(
        model_name="HyperParamModel",
        model_path="outputs/model/model.joblib",
        description="A classification model",
        tags={'Learning Rate': best_run_metrics['Learning Rate'], 
                'N_estimators': best_run_metrics['n_estimators'], 
                'Max Depth': best_run_metrics['Max Depth']
                }
        )