In [1]:

import os
import warnings
import shutil
from pathlib import Path

# Squash warning messages for cleaner output in the notebook
warnings.showwarning = lambda *args, **kwargs: None

from azureml.core import Experiment, Workspace, Dataset
from azureml.core.compute import ComputeTarget

ModuleNotFoundError: No module named 'azureml.core'

In [None]:
ws = Workspace.from_config()
experiment_name = 'hyperparameter-tuning-hyperdrive'
experiment = Experiment(ws, experiment_name)
compute_target = ComputeTarget.list(ws)[0]

train_dataset_name = 'flight-delays-train-dataset'
validation_dataset_name = 'flight-delays-validation-dataset'
if not Path(f'{train_dataset_name}.csv').exists():
    train_ds = Dataset.get_by_name(workspace=ws, name=train_dataset_name).to_pandas_dataframe()
    validation_ds = Dataset.get_by_name(workspace=ws, name=validation_dataset_name).to_pandas_dataframe()
    train_ds.to_csv(path_or_buf='flight-delays-train-dataset.csv')
    validation_ds.to_csv(path_or_buf='flight-delays-validation-dataset.csv')

In [None]:
# Prepare folder of files to send to remote instance
project_folder = './hyperdrive'
os.makedirs(project_folder, exist_ok=True)
shutil.copy('train.py', project_folder)
shutil.copy('flight-delays-train-dataset.csv', project_folder)
shutil.copy('flight-delays-validation-dataset.csv', project_folder)
ws.write_config(path=project_folder)

In [None]:
%%writefile conda_dependencies.yml

dependencies:
- python=3.6.2
- scikit-learn
- pandas
- numpy
- pip:
  - azureml-defaults

In [None]:
from azureml.core import Environment

sklearn_env = Environment.from_conda_specification(name = 'sklearn-env', file_path = './conda_dependencies.yml')


In [None]:
from azureml.core import ScriptRunConfig

src = ScriptRunConfig(source_directory=project_folder,
                      script='train.py',
                      arguments=['--n_estimators', 100, '--learning_rate', 1.0],  # Input defaults
                      compute_target=compute_target,
                      environment=sklearn_env)

In [None]:
from azureml.train.hyperdrive import BanditPolicy
early_termination_policy = BanditPolicy(slack_factor = 0.1, evaluation_interval=1, delay_evaluation=5)


In [None]:
from azureml.train.hyperdrive import RandomParameterSampling, PrimaryMetricGoal
from azureml.train.hyperdrive import normal, choice
param_sampling = RandomParameterSampling( {
        "learning_rate": normal(0.2, 0.05),
        "n_estimators": choice(range(50,250, 50))
    }
)

In [None]:
from azureml.train.hyperdrive import HyperDriveConfig
hd_config = HyperDriveConfig(run_config=src,
                             hyperparameter_sampling=param_sampling,
                             policy=early_termination_policy,
                             primary_metric_name="Accuracy",
                             primary_metric_goal=PrimaryMetricGoal.MAXIMIZE,
                             max_total_runs=12,
                             max_concurrent_runs=4)

In [None]:
hyperdrive_run = experiment.submit(hd_config)

In [None]:
from azureml.widgets import RunDetails
RunDetails(hyperdrive_run).show()
hyperdrive_run.wait_for_completion(show_output=True)