In [None]:
from azureml.core import Workspace

ws = Workspace.from_config()
ws

In [None]:
import git
from pathlib import Path

# get root of git repo
prefix = Path(git.Repo(".", search_parent_directories=True).working_tree_dir)

# training script
script_dir = prefix.joinpath("code", "lightgbm", "iris")
script_name = "train.py"

# environment file
environment_file = prefix.joinpath("environments", "lightgbm.txt")

# azure ml settings
environment_name = "hpo-lightgbm"
experiment_name = "hpo-lightgbm-optuna-coiled-test"
compute_target = "local"

In [None]:
from azureml.core import Experiment, ScriptRunConfig, Environment

def objective(trial):
    ds = ws.datasets['iris']
    env = Environment.from_pip_requirements(environment_name, environment_file)

    arguments = [
        "--data-dir",
        ds.as_mount(),
        "--num-boost-round",
        trial.suggest_int("--num-boost-round", 1, 100),
        "--boosting",
        trial.suggest_categorical("--boosting", ["gbdt", "rf", "dart", "goss"]),
        "--num-iterations",
        trial.suggest_int("--num-iterations", 1, 100), 
        "--num-leaves",
        trial.suggest_int("--num-leaves", 2, 64),
        "--num-threads",
        trial.suggest_categorical("--num-threads", [1, 2, 4]),
        "--learning-rate",
        trial.suggest_loguniform("--learning-rate", 10e-5, .1),
        "--seed",
        trial.suggest_categorical("--seed", [1, 3, 5, 7, 11, 13, 42]),
    ]

    src = ScriptRunConfig(source_directory=script_dir, script=script_name, arguments=arguments, environment=env, compute_target=compute_target)

    run = Experiment(ws, experiment_name).submit(src)
    run.wait_for_completion()
    if run.get_status() == "Completed":
        metrics = run.get_metrics()
        training_time = metrics["training_time"]
        loss = metrics["loss"]
        acc = metrics["accuracy"]   

        return loss
    else:
        return None

In [None]:
#!pip freeze > coiled.txt

In [None]:
#coiled.create_software_environment(name="optuna", pip="coiled.txt")

In [None]:
import coiled

cluster = coiled.Cluster(name="dkdc", n_workers=25, software="optuna")
cluster

In [None]:
import joblib 
import optuna
import dask_optuna

from dask.distributed import Client

c = Client(cluster)
print(c)

storage = dask_optuna.DaskStorage()
study = optuna.create_study(direction="minimize", study_name="dkdc", storage=storage)
with joblib.parallel_backend("dask"):
    study.optimize(objective, n_trials=100, n_jobs=-1)

In [None]:
study.best_params

In [None]:
len(study.trials)

In [None]:
cluster.close()