In [1]:
import sys; sys.path.insert(0, '../..') # add parent folder path where lib folder is

In [2]:
%load_ext autoreload
%autoreload 2

from utils import helper, config, rayer, kaggle_dataset_helper

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

import xgboost as xgb
from sklearn.ensemble import BaggingClassifier, BaggingRegressor

from sklearn.metrics import r2_score, mean_squared_error
from sklearn.metrics import f1_score, roc_auc_score, precision_score, recall_score
from ml.models import common
import ray
import time
import pandas as pd

In [3]:

from ray import tune, air
# from ray.tune.search.optuna import OptunaSearch
from ray.tune.search.bayesopt import BayesOptSearch


In [4]:
rayer.get_global_cluster()

2022-12-02 15:29:20,024	INFO packaging.py:527 -- Creating a file package for local directory '/mnt/c/Users/rwmas/GitHub/xai/python-asd/xai/'.
2022-12-02 15:29:55,941	INFO packaging.py:354 -- Pushing file package 'gcs://_ray_pkg_4abdf7a932e7a6c9.zip' (30.53MiB) to Ray cluster...
2022-12-02 15:29:57,458	INFO packaging.py:367 -- Successfully pushed file package 'gcs://_ray_pkg_4abdf7a932e7a6c9.zip'.


In [5]:
ds_train, ds_test = kaggle_dataset_helper.get_transaction_predictions_dataset()
ds_train = common.label_encode(ds_train)
ds_test = common.label_encode(ds_test)

ds_train = ds_train.fillna(-1)
ds_test = ds_test.fillna(-1)

df_X = ds_train.loc[:, ds_train.columns != 'target']
df_y = ds_train['target']

X_train, X_test, y_train, y_test = train_test_split(df_X, df_y, test_size=0.33, random_state=config.rand_state)


ss = StandardScaler()
X_train_scalar = pd.DataFrame(ss.fit_transform(X_train), columns = X_train.columns)
X_test_scalar = pd.DataFrame(ss.fit_transform(X_test), columns = X_test.columns)




In [44]:
X, y = helper.get_covid_dataset()
X = X.drop(['location'], axis = 1)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33)

ss = StandardScaler()
X_train_scalar = pd.DataFrame(ss.fit_transform(X_train), columns = X_train.columns)
X_test_scalar = pd.DataFrame(ss.fit_transform(X_test), columns = X_test.columns)

In [45]:
# y_train = y_train.values.reshape(y_train.shape[0], -1)
# y_test = y_test.values.reshape(y_test.shape[0], -1)

In [7]:
X_train_id = ray.put(X_train_scalar)
y_train_id = ray.put(y_train)
X_test_id = ray.put(X_test_scalar)
y_test_id = ray.put(y_test)

In [8]:
def __objective__(params, X_train_ref, X_test_ref, y_train_ref, y_test_ref, pred_class):

    # criterion = “gini” [“gini”, “entropy”, “log_loss”]

    X_train = ray.get(X_train_ref)
    X_test = ray.get(X_test_ref)
    y_train = ray.get(y_train_ref)
    y_test = ray.get(y_test_ref)

    # print(X_train.head())
    n_estimators = int(params['n_estimators'])

    if pred_class == 'regression':
        # cv = KFold(n_splits=self.cv_splits, shuffle=True, random_state=config.rand_state)
        score_func = mean_squared_error
        model = BaggingRegressor(n_estimators=n_estimators)

    else:
        # cv = StratifiedKFold(n_splits=self.cv_splits, shuffle=True, random_state=config.rand_state)
        score_func = f1_score
        model =  BaggingClassifier(n_estimators=n_estimators)


    model.fit(X_train, y_train.values.ravel())

    pred_train = model.predict(X_train)
    pred_test = model.predict(X_test)

    err_test = score_func(y_test, pred_test)
    err_train = score_func(y_train, pred_train)
        
    #metirc_score_train, metirc_score_test, weighted_score = self.__get_model_score__(pred_train, pred_test, y_train, y_test, self.score_func)
    weighted_score = common.get_weighted_score(err_train, err_test, pred_class, score_func)

    tune.report(weighted_score=weighted_score)

In [10]:
n_estimators = 10
n_trials = 10

baye_space = {
    # 'base_estimator': DecisionTreeRegressor
    'n_estimators': (10, n_estimators),
}

xgb_space = {
        "objective": 'count:poisson',
        "booster": (["gbtree", "dart"]),
#        "lambda": trial.suggest_float("lambda", 1e-3, 1.0, log=True),
#        "alpha": trial.suggest_float("alpha", 1e-3, 1.0, log=True),
    }


obj_func = lambda params: __objective__(params, X_train_id, X_test_id, y_train_id, y_test_id, 'regression')

# algo = OptunaSearch(space=params, metric="mean_loss", mode="min")
bayesopt = BayesOptSearch(space=baye_space, metric="weighted_score", mode="min",  random_state=config.rand_state)
# algo = ConcurrencyLimiter(algo, max_concurrent=4)

tuner = tune.Tuner(
    obj_func,
    run_config=air.RunConfig(
      name=config.create_study_name(),
      #stop={"training_iteration": 1 if args.smoke_test else 10},
    ),
    tune_config=tune.TuneConfig(
        search_alg=bayesopt,
        num_samples=n_trials,
        #checkpoint_dir=None,
    ),
#            param_space=params,
)



AssertionError: BayesOpt must be installed!. You can install BayesOpt with the command: `pip install bayesian-optimization`.

In [None]:
results = tuner.fit()
best_result = results.get_best_result()

print('############################')
print("Best hyperparameters: ", best_result)
print("Best hyperparameters: ", best_result.config)

In [63]:


xgb_space = {
        # "objective": 'count:poisson',
        #"booster": ("gbtree", "dart"),
        "lambda": (1e-3, 1.0),
#        "alpha": trial.suggest_float("alpha", 1e-3, 1.0, log=True),
    }


obj_func = lambda params: __objective_xgb__(params, X_train_id, X_test_id, y_train_id, y_test_id, 'regression')

# algo = OptunaSearch(space=params, metric="mean_loss", mode="min")
bayesopt = BayesOptSearch(space=xgb_space, metric="weighted_score", mode="min",  random_state=config.rand_state)
# algo = ConcurrencyLimiter(algo, max_concurrent=4)

tuner = tune.Tuner(
    obj_func,
    run_config=air.RunConfig(
      name=config.create_study_name(),
      #stop={"training_iteration": 1 if args.smoke_test else 10},
    ),
    tune_config=tune.TuneConfig(
        search_alg=bayesopt,
        num_samples=n_trials,
        #checkpoint_dir=None,
    ),
#            param_space=params,
)

results = tuner.fit()
best_result = results.get_best_result()

print('############################')
print("Best hyperparameters: ", best_result)
print("Best hyperparameters: ", best_result.config)

0,1
Current time:,2022-11-24 12:36:35
Running for:,00:00:16.21
Memory:,4.0/15.4 GiB

Trial name,# failures,error file
lambda_e2d4d7ee,1,/home/wasif/ray_results/lambda_2022-11-24_12-36-18/lambda_e2d4d7ee_1_lambda=0.5493_2022-11-24_12-36-18/error.txt
lambda_e42bd41c,1,/home/wasif/ray_results/lambda_2022-11-24_12-36-18/lambda_e42bd41c_2_lambda=0.7155_2022-11-24_12-36-21/error.txt
lambda_e432e428,1,/home/wasif/ray_results/lambda_2022-11-24_12-36-18/lambda_e432e428_3_lambda=0.6032_2022-11-24_12-36-21/error.txt
lambda_e43a1db0,1,/home/wasif/ray_results/lambda_2022-11-24_12-36-18/lambda_e43a1db0_4_lambda=0.5453_2022-11-24_12-36-21/error.txt
lambda_e4419180,1,/home/wasif/ray_results/lambda_2022-11-24_12-36-18/lambda_e4419180_5_lambda=0.4242_2022-11-24_12-36-22/error.txt
lambda_e4d4e502,1,/home/wasif/ray_results/lambda_2022-11-24_12-36-18/lambda_e4d4e502_6_lambda=0.6462_2022-11-24_12-36-26/error.txt
lambda_e7a7dec4,1,/home/wasif/ray_results/lambda_2022-11-24_12-36-18/lambda_e7a7dec4_7_lambda=0.4381_2022-11-24_12-36-26/error.txt
lambda_e7af0712,1,/home/wasif/ray_results/lambda_2022-11-24_12-36-18/lambda_e7af0712_8_lambda=0.8919_2022-11-24_12-36-26/error.txt
lambda_e7b63fe6,1,/home/wasif/ray_results/lambda_2022-11-24_12-36-18/lambda_e7b63fe6_9_lambda=0.9637_2022-11-24_12-36-27/error.txt
lambda_e7bdc09a,1,/home/wasif/ray_results/lambda_2022-11-24_12-36-18/lambda_e7bdc09a_10_lambda=0.3841_2022-11-24_12-36-32/error.txt

Trial name,status,loc,lambda
lambda_e2d4d7ee,ERROR,10.10.100.184:25353,0.549265
lambda_e42bd41c,ERROR,10.10.100.184:25381,0.715474
lambda_e432e428,ERROR,10.10.100.184:25383,0.603161
lambda_e43a1db0,ERROR,10.10.100.184:25385,0.545338
lambda_e4419180,ERROR,10.10.100.184:25396,0.424231
lambda_e4d4e502,ERROR,10.10.100.184:25529,0.646248
lambda_e7a7dec4,ERROR,10.10.100.184:25531,0.43815
lambda_e7af0712,ERROR,10.10.100.184:25535,0.891881
lambda_e7b63fe6,ERROR,10.10.100.184:25537,0.963699
lambda_e7bdc09a,ERROR,10.10.100.184:25678,0.384058


2022-11-24 12:36:21,302	ERROR trial_runner.py:993 -- Trial lambda_e2d4d7ee: Error processing event.
ray.exceptions.RayTaskError(NameError): [36mray::ImplicitFunc.train()[39m (pid=25353, ip=10.10.100.184, repr=<lambda>)
  File "/home/conda/admin/807c952d1ebb2e498414dc4b80a90b96c80ef2ef433f07c7a7381b5e38e2b5fa-20221110-224801-275852-64-wasif-dev-ray/lib/python3.9/site-packages/ray/tune/trainable/trainable.py", line 355, in train
    raise skipped from exception_cause(skipped)
  File "/home/conda/admin/807c952d1ebb2e498414dc4b80a90b96c80ef2ef433f07c7a7381b5e38e2b5fa-20221110-224801-275852-64-wasif-dev-ray/lib/python3.9/site-packages/ray/tune/trainable/function_trainable.py", line 325, in entrypoint
    return self._trainable_func(
  File "/home/conda/admin/807c952d1ebb2e498414dc4b80a90b96c80ef2ef433f07c7a7381b5e38e2b5fa-20221110-224801-275852-64-wasif-dev-ray/lib/python3.9/site-packages/ray/tune/trainable/function_trainable.py", line 651, in _trainable_func
    output = fn()
  File "/tm

Trial name,date,experiment_id,hostname,node_ip,pid,timestamp,trial_id
lambda_e2d4d7ee,2022-11-24_12-36-21,0cd72475d6574d8192088d5d66bdb59d,jupyter-wasif,10.10.100.184,25353,1669314981,e2d4d7ee
lambda_e42bd41c,2022-11-24_12-36-24,346218f28c3a4a9ab0c4e1b6f9fecdd1,jupyter-wasif,10.10.100.184,25381,1669314984,e42bd41c
lambda_e432e428,2022-11-24_12-36-24,36e672763b7747be8653ba13188fbce7,jupyter-wasif,10.10.100.184,25383,1669314984,e432e428
lambda_e43a1db0,2022-11-24_12-36-24,834b488085b04058acc9b80f32689386,jupyter-wasif,10.10.100.184,25385,1669314984,e43a1db0
lambda_e4419180,2022-11-24_12-36-24,9d7d1a58609f4b458a32611c656b5540,jupyter-wasif,10.10.100.184,25396,1669314984,e4419180
lambda_e4d4e502,2022-11-24_12-36-30,9489826951944ff49e10fd829137442b,jupyter-wasif,10.10.100.184,25529,1669314990,e4d4e502
lambda_e7a7dec4,2022-11-24_12-36-30,36886f1488a34d22b1905396d7aadf08,jupyter-wasif,10.10.100.184,25531,1669314990,e7a7dec4
lambda_e7af0712,2022-11-24_12-36-30,c466d8c6561d4fec9eaf2093a8f8600f,jupyter-wasif,10.10.100.184,25535,1669314990,e7af0712
lambda_e7b63fe6,2022-11-24_12-36-30,48c50e3321e3468482365ff6c3acd1c5,jupyter-wasif,10.10.100.184,25537,1669314990,e7b63fe6
lambda_e7bdc09a,2022-11-24_12-36-34,35edf671c24947a1a2336c6a408a2d0a,jupyter-wasif,10.10.100.184,25678,1669314994,e7bdc09a


2022-11-24 12:36:24,213	ERROR trial_runner.py:993 -- Trial lambda_e42bd41c: Error processing event.
ray.exceptions.RayTaskError(NameError): [36mray::ImplicitFunc.train()[39m (pid=25381, ip=10.10.100.184, repr=<lambda>)
  File "/home/conda/admin/807c952d1ebb2e498414dc4b80a90b96c80ef2ef433f07c7a7381b5e38e2b5fa-20221110-224801-275852-64-wasif-dev-ray/lib/python3.9/site-packages/ray/tune/trainable/trainable.py", line 355, in train
    raise skipped from exception_cause(skipped)
  File "/home/conda/admin/807c952d1ebb2e498414dc4b80a90b96c80ef2ef433f07c7a7381b5e38e2b5fa-20221110-224801-275852-64-wasif-dev-ray/lib/python3.9/site-packages/ray/tune/trainable/function_trainable.py", line 325, in entrypoint
    return self._trainable_func(
  File "/home/conda/admin/807c952d1ebb2e498414dc4b80a90b96c80ef2ef433f07c7a7381b5e38e2b5fa-20221110-224801-275852-64-wasif-dev-ray/lib/python3.9/site-packages/ray/tune/trainable/function_trainable.py", line 651, in _trainable_func
    output = fn()
  File "/tm

ValueError: No metric is provided. Either pass in a `metric` arg to `get_best_result` or specify a metric in the `TuneConfig` of your `Tuner`.

In [15]:
from ray import tune

# 1. Define an objective function.
def objective(config):
    score = config["a"] ** 2 + config["b"]
    return {"score": score}


# 2. Define a search space.
search_space = {
    "a": tune.grid_search([0.001, 0.01, 0.1, 1.0]),
    "b": tune.choice([1, 2, 3]),
}

# 3. Start a Tune run and print the best result.
tuner = tune.Tuner(objective, param_space=search_space)
results = tuner.fit()
print(results.get_best_result(metric="score", mode="min").config)



[2m[36m(TunerInternal pid=363)[0m == Status ==
[2m[36m(TunerInternal pid=363)[0m Current time: 2022-11-24 07:25:46 (running for 00:00:02.08)
[2m[36m(TunerInternal pid=363)[0m Memory usage on this node: 2.7/15.4 GiB 
[2m[36m(TunerInternal pid=363)[0m Using FIFO scheduling algorithm.
[2m[36m(TunerInternal pid=363)[0m Resources requested: 1.0/5 CPUs, 0/0 GPUs, 0.0/16.76 GiB heap, 0.0/4.91 GiB objects
[2m[36m(TunerInternal pid=363)[0m Result logdir: /home/ray/ray_results/objective_2022-11-24_07-25-40
[2m[36m(TunerInternal pid=363)[0m Number of trials: 4/4 (3 PENDING, 1 RUNNING)
[2m[36m(TunerInternal pid=363)[0m +-----------------------+----------+-----------------+-------+-----+
[2m[36m(TunerInternal pid=363)[0m | Trial name            | status   | loc             |     a |   b |
[2m[36m(TunerInternal pid=363)[0m |-----------------------+----------+-----------------+-------+-----|
[2m[36m(TunerInternal pid=363)[0m | objective_414eb_00000 | RUNNING  | 10.10.

[2m[36m(TunerInternal pid=363)[0m 2022-11-24 07:25:46,471	INFO tune.py:777 -- Total run time: 5.53 seconds (2.23 seconds for the tuning loop).
[2m[36m(worker pid=138, ip=10.10.63.163)[0m   from ray.tune.suggest.optuna import OptunaSearch
[2m[36m(worker pid=138, ip=10.10.63.163)[0m 2022-11-24T07:25:52PST : INFO : font_manager : _load_fontmanager : 1633 : Message : generated new fontManager
[2m[36m(worker pid=138, ip=10.10.63.163)[0m   from ray.tune.suggest.optuna import OptunaSearch
[2m[36m(worker pid=138, ip=10.10.63.163)[0m 2022-11-24T07:25:52PST : INFO : brisk_bagging : __discover_model__ : 86 : Message : brisk_xgb2: Starting training for trials:300, n_estimators  100
[2m[36m(worker pid=138, ip=10.10.63.163)[0m [32m[I 2022-11-24 07:25:52,635][0m A new study created in memory with name: optuna[0m


In [16]:
from ray import tune
from ray.tune.search.bayesopt import BayesOptSearch

space = {
    'width': (0, 20),
    'height': (-100, 100),
}
bayesopt = BayesOptSearch(space, metric="mean_loss", mode="min")
tuner = tune.Tuner(
    objective,
    tune_config=tune.TuneConfig(
        search_alg=bayesopt,
    ),
)
tuner.fit()



[2m[36m(TunerInternal pid=482)[0m == Status ==
[2m[36m(TunerInternal pid=482)[0m Current time: 2022-11-24 07:27:54 (running for 00:00:01.37)
[2m[36m(TunerInternal pid=482)[0m Memory usage on this node: 2.8/15.4 GiB 
[2m[36m(TunerInternal pid=482)[0m Using FIFO scheduling algorithm.
[2m[36m(TunerInternal pid=482)[0m Resources requested: 1.0/5 CPUs, 0/0 GPUs, 0.0/16.76 GiB heap, 0.0/4.91 GiB objects
[2m[36m(TunerInternal pid=482)[0m Result logdir: /home/ray/ray_results/objective_2022-11-24_07-27-52
[2m[36m(TunerInternal pid=482)[0m Number of trials: 1/1 (1 RUNNING)
[2m[36m(TunerInternal pid=482)[0m +--------------------+----------+------------------+----------+---------+
[2m[36m(TunerInternal pid=482)[0m | Trial name         | status   | loc              |   height |   width |
[2m[36m(TunerInternal pid=482)[0m |--------------------+----------+------------------+----------+---------|
[2m[36m(TunerInternal pid=482)[0m | objective_901d919a | RUNNING  | 10.10

[2m[36m(TunerInternal pid=482)[0m 2022-11-24 07:27:54,535	ERROR trial_runner.py:993 -- Trial objective_901d919a: Error processing event.
[2m[36m(TunerInternal pid=482)[0m ray.exceptions.RayTaskError(KeyError): [36mray::ImplicitFunc.train()[39m (pid=198, ip=10.10.63.163, repr=objective)
[2m[36m(TunerInternal pid=482)[0m   File "/home/ray/anaconda3/lib/python3.9/site-packages/ray/tune/trainable/trainable.py", line 355, in train
[2m[36m(TunerInternal pid=482)[0m     raise skipped from exception_cause(skipped)
[2m[36m(TunerInternal pid=482)[0m   File "/home/ray/anaconda3/lib/python3.9/site-packages/ray/tune/trainable/function_trainable.py", line 325, in entrypoint
[2m[36m(TunerInternal pid=482)[0m     return self._trainable_func(
[2m[36m(TunerInternal pid=482)[0m   File "/home/ray/anaconda3/lib/python3.9/site-packages/ray/tune/trainable/function_trainable.py", line 651, in _trainable_func
[2m[36m(TunerInternal pid=482)[0m     output = fn()
[2m[36m(TunerInternal 

<ray.tune.result_grid.ResultGrid at 0x7fbdc0e57c40>

[2m[36m(TunerInternal pid=482)[0m 2022-11-24 07:27:54,643	ERROR tune.py:773 -- Trials did not complete: [objective_901d919a]
[2m[36m(TunerInternal pid=482)[0m 2022-11-24 07:27:54,643	INFO tune.py:777 -- Total run time: 2.55 seconds (1.38 seconds for the tuning loop).
