<i>Copyright (c) Microsoft Corporation. All rights reserved.</i>

<i>Licensed under the MIT License.</i>

# Testing different Hyperparameters and Benchmarking

In this notebook, we'll cover how to test different hyperparameters for a particular dataset and how to benchmark different parameters across a group of datasets.

## Table of Contents

* [Testing parameter](#hyperparam)
  * [Using Python](#python)
  * [Using the CLI](#cli)
  * [Visualizing the results](#visualize)

---

## Testing hyperparameters  <a name="hyperparam"></a>

Lets say we want to learn more about __how different learning rates and different image sizes affect our model's accuracy when restricted to 10 epochs__, and we want to build an experiment to test out these hyperparameters. We also want to try these parameters out on two different variations of the dataset - one where the images are kept raw (maybe there is a watermark on the image) and one where the images have been altered (the same dataset where there was some attempt to remove the watermark).

In this notebook, we'll walk through how we use the Parameter Sweeper module with the following:

- use python to perform this experiment
- use the CLI to perform this experiment
- evalute the results using Pandas

Check out fastai version.

In [None]:
import fastai
from fastai.vision import *

import os
import sys
sys.path.append("../../")

from utils_cv.classification.data import Urls
from utils_cv.common.data import unzip_url
from utils_cv.classification.parameter_sweeper import *
from utils_cv.classification.model import TrainMetricsRecorder

import azureml.core
from azureml.core import Workspace, Experiment
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException
from azureml.train.estimator import Estimator
import azureml.data
from azureml.train.hyperdrive import RandomParameterSampling, BanditPolicy, HyperDriveConfig, uniform, PrimaryMetricGoal, choice
import azureml.widgets as widgets

print("SDK version:", azureml.core.VERSION)

Ensure edits to libraries are loaded and plotting is shown in the notebook.

In [None]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [None]:
ws = Workspace.setup()
ws_details = ws.get_details()
print('Name:\t\t{}\nLocation:\t{}'
      .format(ws_details['name'],
              ws_details['location']))

In [None]:
# choose a name for your cluster
cluster_name = "gpu-cluster-nc24"
# Remote compute (cluster) configuration. If you want to save the cost more, set these to small.
VM_SIZE = 'STANDARD_NC24'
VM_PRIORITY = 'lowpriority'

# Cluster nodes
MIN_NODES = 0
MAX_NODES = 4

try:
    compute_target = ComputeTarget(workspace=ws, name=cluster_name)
    print('Found existing compute target.')
except ComputeTargetException:
    print('Creating a new compute target...')
    compute_config = AmlCompute.provisioning_configuration(vm_size=VM_SIZE,
                                                           min_nodes=MIN_NODES,
                                                           max_nodes=MAX_NODES)

    # create the cluster
    compute_target = ComputeTarget.create(ws, cluster_name, compute_config)

    compute_target.wait_for_completion(show_output=True)

# use get_status() to get a detailed status for the current cluster. 
print(compute_target.get_status().serialize())

In [None]:
# Note, all the files under DATA_DIR will be uploaded to the data store
DATA = unzip_url(Urls.fridge_objects_path, exist_ok=True)
REPS = 3

ds = ws.get_default_datastore()

ds.upload(
    src_dir=os.path.dirname(DATA),
    target_path='data',
    overwrite=True,
    show_progress=True
)

In [None]:
script_folder = os.path.join(os.getcwd(), "hyperparameter")
print(script_folder)
os.makedirs(script_folder, exist_ok=True)

In [None]:
%%writefile $script_folder/train.py

import sys
import os
import numpy as np


#sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.getcwd()))))
from azureml.core import Run
import fastai
from fastai.vision import *
from fastai.vision.data import *

run = Run.get_context()

data_store_path = str(os.environ['AZUREML_DATAREFERENCE_workspaceblobstore'])
path = data_store_path + '/data/fridgeObjects'

IM_SIZES = [299, 499]
ARCHITECTURE  = models.resnet50
LEARNING_RATES = [1e-3, 1e-4, 1e-5]
EPOCHS = [10]

data = (ImageList.from_folder(path)
        .split_by_rand_pct(valid_pct=0.2, seed=10)
        .label_from_folder() 
        .transform(size=299) 
        .databunch(bs=16) 
        .normalize(imagenet_stats))


learn = cnn_learner(
    data,
    ARCHITECTURE,
    metrics=[accuracy]
    #callback_fns=[partial(TrainMetricsRecorder, show_graph=True)]
)

learn.unfreeze()

learn.fit(EPOCHS[0], LEARNING_RATES[0])

training_losses = [x.numpy().ravel()[0] for x in learn.recorder.losses]
accuracy = [x[0].numpy().ravel()[0] for x in learn.recorder.metrics][-1]

#run.log_list('training_loss', training_losses)
#run.log_list('validation_loss', learn.recorder.val_losses)
#run.log_list('error_rate', error_rate)
#run.log_list('learning_rate', learn.recorder.lrs)
run.log('accuracy', float(accuracy))

In [None]:
IM_SIZES = [299, 499]
LEARNING_RATES = [1e-3, 1e-4, 1e-5]

# Hyperparameter search space
param_sampling = RandomParameterSampling( {
        'learning_rate': choice(LEARNING_RATES),
        'im_sizes': choice(IM_SIZES)
    }
)

primary_metric_name = 'accuracy'
primary_metric_goal = PrimaryMetricGoal.MAXIMIZE
max_total_runs=50
max_concurrent_runs=4

early_termination_policy = BanditPolicy(slack_factor=0.15, evaluation_interval=1, delay_evaluation=20)

In [None]:
script_params = {
    '--data-folder': ds.as_mount(),
}

est = Estimator(source_directory=script_folder,
                script_params=script_params,
                compute_target=compute_target,
                entry_script='train.py',
                pip_packages=['fastai'])

In [None]:
experiment_name = 'hyperparameter-tuning'
exp = Experiment(workspace=ws, name=experiment_name)

In [None]:
hyperdrive_run_config = HyperDriveConfig(estimator=est,
                                         hyperparameter_sampling=param_sampling,
                                         policy=early_termination_policy,
                                         primary_metric_name=primary_metric_name,
                                         primary_metric_goal=primary_metric_goal,
                                         max_total_runs=max_total_runs,
                                         max_concurrent_runs= max_concurrent_runs)

In [None]:
hyperdrive_run = exp.submit(config=hyperdrive_run_config)
widgets.RunDetails(hyperdrive_run).show()

In [None]:
# Get best run and printout metrics
best_run = hyperdrive_run.get_best_run_by_primary_metric()
best_run_metrics = best_run.get_metrics()
parameter_values = best_run.get_details()['runDefinition']['arguments']
best_parameters = dict(zip(parameter_values[::2], parameter_values[1::2]))

In [None]:
print("* Best Run Id:", best_run.id)
print(best_run)
print("\n* Best hyperparameters:")
print(best_parameters)
print("Accuracy =", best_run_metrics['accuracy'])
#print("Learning Rate =", best_run_metrics['learning_rate'])


In [None]:
model_parameters = best_parameters.copy()
model_parameters['--data-folder'] = ds.as_mount()

est = Estimator(source_directory=script_folder,
                script_params=model_parameters,
                compute_target=compute_target,
                entry_script='train.py',
                pip_packages=['fastai'])

model_run = exp.submit(est)
model_run