In [1]:
%load_ext autoreload
%autoreload 2
import sys

sys.path.insert(0, "..")  # add parent folder path where lib folder is

In [2]:
from utils import config, dasker, helper

--------------------------------------------------------------------------------

  CuPy may not function correctly because multiple CuPy packages are installed
  in your environment:

    cupy, cupy-cuda115

  Follow these steps to resolve this issue:

    1. For all packages listed above, run the following command to remove all
       existing CuPy installations:

         $ pip uninstall <package_name>

      If you previously installed CuPy via conda, also run the following:

         $ conda uninstall cupy

    2. Install the appropriate CuPy package.
       Refer to the Installation Guide for detailed instructions.

         https://docs.cupy.dev/en/stable/install.html

--------------------------------------------------------------------------------



In [10]:
"""
Example using Coiled (https://coiled.io) and Dask-Optuna to run optimization trials
on a Dask cluster on AWS.
Here we use Optuna to tune hyperparameters for an XGBoost classifier.
Adapted from https://github.com/optuna/optuna/blob/master/examples/xgboost_simple.py
"""
from pprint import pprint

import dask_optuna
import joblib
import numpy as np
import optuna
import sklearn.datasets
import sklearn.metrics
import xgboost as xgb
from dask.distributed import Client

# from sklearn.model_selection import train_test_split
from dask_ml.model_selection import train_test_split

optuna.logging.set_verbosity(optuna.logging.WARNING)

In [3]:
def objective(trial):
    # X, y = sklearn.datasets.load_breast_cancer(return_X_y=True)
    X, y = helper.get_dd_covid_dataset()
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)
    dtrain = xgb.DMatrix(X_train, label=y_train)
    dtest = xgb.DMatrix(X_test, label=y_test)

    param = {
        "silent": 1,
        "objective": "binary:logistic",
        "booster": trial.suggest_categorical("booster", ["gbtree", "gblinear", "dart"]),
        "lambda": trial.suggest_float("lambda", 1e-8, 1.0, log=True),
        "alpha": trial.suggest_float("alpha", 1e-8, 1.0, log=True),
    }

    if param["booster"] == "gbtree" or param["booster"] == "dart":
        param["max_depth"] = trial.suggest_int("max_depth", 1, 9)
        param["eta"] = trial.suggest_float("eta", 1e-8, 1.0, log=True)
        param["gamma"] = trial.suggest_float("gamma", 1e-8, 1.0, log=True)
        param["grow_policy"] = trial.suggest_categorical(
            "grow_policy", ["depthwise", "lossguide"]
        )
    if param["booster"] == "dart":
        param["sample_type"] = trial.suggest_categorical(
            "sample_type", ["uniform", "weighted"]
        )
        param["normalize_type"] = trial.suggest_categorical(
            "normalize_type", ["tree", "forest"]
        )
        param["rate_drop"] = trial.suggest_float("rate_drop", 1e-8, 1.0, log=True)
        param["skip_drop"] = trial.suggest_float("skip_drop", 1e-8, 1.0, log=True)

    bst = xgb.train(param, dtrain)
    preds = bst.predict(dtest)
    pred_labels = np.rint(preds)
    accuracy = sklearn.metrics.accuracy_score(y_test, pred_labels)
    return accuracy

In [4]:
with dasker.get_global_client() as client:
    print(f"Dask dashboard is available at {client.dashboard_link}")
    # client.wait_for_workers(5)

    storage = dask_optuna.DaskStorage()
    study = optuna.create_study(storage=storage, direction="maximize")
    with joblib.parallel_backend("dask"):
        study.optimize(objective, n_trials=100, n_jobs=-1)

    print("Best params:")
    pprint(study.best_params)

connected to cluster dev.0d572b45216f4583ace62eae2c1a0f32
Dask dashboard is available at https://asd-dev.link/gateway/clusters/dev.0d572b45216f4583ace62eae2c1a0f32/status



+-------------+-----------+-----------+-----------+
| Package     | client    | scheduler | workers   |
+-------------+-----------+-----------+-----------+
| dask        | 2022.10.2 | 2022.04.2 | 2022.04.2 |
| distributed | 2022.10.2 | 2022.4.2  | 2022.4.2  |
+-------------+-----------+-----------+-----------+


NameError: name 'dask_optuna' is not defined