In [1]:
from azureml.core import Workspace

ws = Workspace.from_config()
ws

Workspace.create(name=&#39;AzureML&#39;, subscription_id=&#39;6560575d-fa06-4e7d-95fb-f962e74efd7a&#39;, resource_group=&#39;cody-eastus-rg&#39;)

In [2]:
import git
from pathlib import Path

# get root of git repo
prefix = Path(git.Repo(".", search_parent_directories=True).working_tree_dir)

# training script
script_dir = prefix.joinpath("code", "lightgbm", "iris")
script_name = "train.py"

# environment file
environment_file = prefix.joinpath("environments", "lightgbm.txt")

# azure ml settings
environment_name = "hpo-lightgbm"
experiment_name = "hpo-lightgbm-hyperdrive-loss"
compute_target = "cpu-cluster"

In [3]:
from azureml.core import Experiment, ScriptRunConfig, Environment

ds = ws.datasets['iris']

env = Environment.from_pip_requirements(environment_name, environment_file)

arguments = ["--data-dir", ds.as_mount()]

src = ScriptRunConfig(source_directory=script_dir, script=script_name, arguments=arguments, environment=env, compute_target=compute_target)

#run = Experiment(ws, experiment_name).submit(src)
#run

In [4]:
from azureml.train.hyperdrive.runconfig import HyperDriveConfig
from azureml.train.hyperdrive.sampling import RandomParameterSampling, BayesianParameterSampling
from azureml.train.hyperdrive.run import PrimaryMetricGoal
from azureml.train.hyperdrive.parameter_expressions import (
    choice,
    loguniform,
    uniform,
)

param_sampling = RandomParameterSampling(
    {
    "--num-boost-round": choice(range(1, 100)),
    "--boosting": choice(["gbdt", "rf", "dart", "goss"]), 
    "--num-iterations": choice(range(1, 100)),
    "--num-leaves": choice(range(2, 64)),
    "--num-threads": choice([1, 2, 4]),
    "--learning-rate": loguniform(10e-5, .1),
    "--seed": choice([1, 3, 5, 7, 11, 13, 42]),
    }
)

hdc = HyperDriveConfig(
    run_config=src,
    hyperparameter_sampling=param_sampling,
    primary_metric_name="loss",
    primary_metric_goal=PrimaryMetricGoal.MINIMIZE,
    max_total_runs=100,
    max_concurrent_runs=20,
)

run = Experiment(ws, experiment_name).submit(hdc)
run

Experiment,Id,Type,Status,Details Page,Docs Page
hpo-lightgbm-hyperdrive-loss,HD_085d8904-cb8c-4d74-8eca-ac59334b9b6e,hyperdrive,Running,Link to Azure Machine Learning studio,Link to Documentation


In [None]:
from azureml.widgets import RunDetails

RunDetails(run).show()

In [None]:
%%time

run.wait_for_completion()
best_run = run.get_best_run_by_primary_metric()
best_run.get_details()["runDefinition"]["arguments"]

In [None]:
best_run.get_metrics()