In [1]:
from azureml.core import Workspace

ws = Workspace.from_config()
ws

Workspace.create(name=&#39;AzureML&#39;, subscription_id=&#39;6560575d-fa06-4e7d-95fb-f962e74efd7a&#39;, resource_group=&#39;cody-eastus-rg&#39;)

In [2]:
import git
from pathlib import Path

# get root of git repo
prefix = Path(git.Repo(".", search_parent_directories=True).working_tree_dir)

# training script
script_dir = prefix.joinpath("code", "lightgbm", "iris")
script_name = "train.py"

# environment file
environment_file = prefix.joinpath("environments", "lightgbm.txt")

# azure ml settings
environment_name = "hpo-lightgbm"
experiment_name = "hpo-lightgbm-optuna-minimize-train-time"
compute_target = "local"

In [3]:
from azureml.core import Experiment, ScriptRunConfig, Environment


def objective(trial):
    ds = ws.datasets["iris"]
    env = Environment.from_pip_requirements(environment_name, environment_file)

    arguments = [
        "--data-dir",
        ds.as_mount(),
        "--num-boost-round",
        trial.suggest_int("--num-boost-round", 1, 100),
        "--boosting",
        trial.suggest_categorical(
            "--boosting", ["gbdt", "rf", "dart", "goss"]
        ),
        "--num-iterations",
        trial.suggest_int("--num-iterations", 1, 100),
        "--num-leaves",
        trial.suggest_int("--num-leaves", 2, 64),
        "--num-threads",
        trial.suggest_categorical("--num-threads", [1, 2, 4]),
        "--learning-rate",
        trial.suggest_loguniform("--learning-rate", 10e-5, 0.1),
        "--seed",
        trial.suggest_categorical("--seed", [1, 3, 5, 7, 11, 13, 42]),
    ]

    src = ScriptRunConfig(
        source_directory=script_dir,
        script=script_name,
        arguments=arguments,
        environment=env,
        compute_target=compute_target,
    )

    run = Experiment(ws, experiment_name).submit(src)
    run.wait_for_completion()
    if run.get_status() == "Completed":
        metrics = run.get_metrics()
        training_time = metrics["training_time"]
        loss = metrics["loss"]
        acc = metrics["accuracy"]

        return training_time
    else:
        return None

In [4]:
%%time

import joblib
import optuna
import dask_optuna

from dask.distributed import Client

c = Client()
print(c)
print(c.dashboard_link)

storage = dask_optuna.DaskStorage()
study = optuna.create_study(
    direction="minimize", study_name="dkdc", storage=storage
)
with joblib.parallel_backend("dask"):
    study.optimize(objective, n_trials=100, n_jobs=-1)

&lt;Client: &#39;tcp://127.0.0.1:58575&#39; processes=4 threads=8, memory=34.36 GB&gt;
http://127.0.0.1:8787/status
[I 2020-10-03 20:43:49,778] A new study created in memory with name: dkdc
CPU times: user 1min 34s, sys: 9.8 s, total: 1min 43s
Wall time: 18min 5s


In [5]:
study.best_params

{&#39;--num-boost-round&#39;: 91,
 &#39;--boosting&#39;: &#39;gbdt&#39;,
 &#39;--num-iterations&#39;: 56,
 &#39;--num-leaves&#39;: 40,
 &#39;--num-threads&#39;: 1,
 &#39;--learning-rate&#39;: 0.0007155791176915489,
 &#39;--seed&#39;: 11}

In [6]:
study.best_trial

FrozenTrial(number=64, value=43.99907112121582, datetime_start=datetime.datetime(2020, 10, 3, 20, 53, 54, 793768), datetime_complete=datetime.datetime(2020, 10, 3, 20, 55, 36, 94860), params={&#39;--num-boost-round&#39;: 91, &#39;--boosting&#39;: &#39;gbdt&#39;, &#39;--num-iterations&#39;: 56, &#39;--num-leaves&#39;: 40, &#39;--num-threads&#39;: 1, &#39;--learning-rate&#39;: 0.0007155791176915489, &#39;--seed&#39;: 11}, distributions={&#39;--num-boost-round&#39;: IntUniformDistribution(high=100, low=1, step=1), &#39;--boosting&#39;: CategoricalDistribution(choices=(&#39;gbdt&#39;, &#39;rf&#39;, &#39;dart&#39;, &#39;goss&#39;)), &#39;--num-iterations&#39;: IntUniformDistribution(high=100, low=1, step=1), &#39;--num-leaves&#39;: IntUniformDistribution(high=64, low=2, step=1), &#39;--num-threads&#39;: CategoricalDistribution(choices=(1, 2, 4)), &#39;--learning-rate&#39;: LogUniformDistribution(high=0.1, low=0.0001), &#39;--seed&#39;: CategoricalDistribution(choices=(1, 3, 5, 7, 11, 13, 42

In [7]:
study.best_value

43.99907112121582