In [1]:
import ray
from ray import tune
from ray.tune.schedulers import AsyncHyperBandScheduler
from ray.tune.suggest.hyperopt import HyperOptSearch
from hyperopt.pyll.base import scope
from hyperopt import hp

from data_and_regressor import get_data
from sklearn.ensemble import GradientBoostingRegressor
import numpy as np

X, y, n_features = get_data()
from sklearn.model_selection import cross_val_score

In [2]:
@scope.define
def to_int(x):
    return 1 if x==0 else x

In [3]:
@scope.define
def to_abs(x):
    return abs(x)

In [4]:
# In some environment hp.randint is not working
space = {
    "learning_rate": scope.to_abs(hp.uniform("learning_rate", 10 ** -5, 1)),
    "max_depth": scope.to_int(hp.randint("max_depth", 5)),
    "max_features": scope.to_int(hp.randint("max_features", 13)),
    "min_samples_split": scope.to_int(hp.randint("min_samples_split", 100)),
    "min_samples_leaf": scope.to_int(hp.randint("min_samples_leaf", 100)),
}

In [5]:
def objective(params, reporter):
    """
        Function to minimize
    """
    reg = GradientBoostingRegressor(n_estimators=50, random_state=0)
    reg.set_params(**params)

    reporter(
        loss=-np.mean(
            cross_val_score(
                reg, X, y, cv=5, n_jobs=-1, scoring="neg_mean_absolute_error"
            )
        )
    )

In [6]:
ray.init(object_store_memory=10**9, memory=10**9)

2020-04-24 03:32:13,950	INFO resource_spec.py:212 -- Starting Ray with 0.93 GiB memory available for workers and up to 0.93 GiB for objects. You can adjust these settings with ray.init(memory=<bytes>, object_store_memory=<bytes>).
2020-04-24 03:32:14,379	INFO services.py:1148 -- View the Ray dashboard at [1m[32mlocalhost:8265[39m[22m


{'node_ip_address': '10.16.33.136',
 'redis_address': '10.16.33.136:57689',
 'object_store_address': '/tmp/ray/session_2020-04-24_03-32-13_949497_1117/sockets/plasma_store',
 'raylet_socket_name': '/tmp/ray/session_2020-04-24_03-32-13_949497_1117/sockets/raylet',
 'webui_url': 'localhost:8265',
 'session_dir': '/tmp/ray/session_2020-04-24_03-32-13_949497_1117'}

In [None]:
algo = HyperOptSearch(space, max_concurrent=2, metric="loss", mode="min")
config = {
    "num_samples": 50,
}
scheduler = AsyncHyperBandScheduler(metric="loss", mode="min")
analysis = tune.run(objective, search_alg=algo, scheduler=scheduler, **config)

Trial name,status,loc,learning_rate,max_depth,max_features,min_samples_leaf,min_samples_split
objective_311001a6,RUNNING,,0.543695,2,3,20,20
objective_31122a6c,PENDING,,0.962971,3,3,1,25


[2m[36m(pid=1166)[0m 2020-04-24 03:32:15,821	INFO trainable.py:217 -- Getting current IP.
[2m[36m(pid=1168)[0m 2020-04-24 03:32:15,863	INFO trainable.py:217 -- Getting current IP.
Result for objective_311001a6:
  date: 2020-04-24_03-32-17
  done: false
  experiment_id: 1a8162e7e8954ca9a301e51530b5fb15
  experiment_tag: 1_learning_rate=0.5437,max_depth=2,max_features=3,min_samples_leaf=20,min_samples_split=20
  hostname: ip-10-16-33-136.ec2.internal
  iterations_since_restore: 1
  loss: 3.6410054356730184
  node_ip: 10.16.33.136
  pid: 1166
  time_since_restore: 1.2209439277648926
  time_this_iter_s: 1.2209439277648926
  time_total_s: 1.2209439277648926
  timestamp: 1587699137
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: 311001a6
  
Result for objective_31122a6c:
  date: 2020-04-24_03-32-17
  done: true
  experiment_id: 308c7d38146748ce9b5c304329d9c8d6
  experiment_tag: 2_learning_rate=0.96297,max_depth=3,max_features=3,min_samples_leaf=1,min_samples_split=25
 

Trial name,status,loc,learning_rate,max_depth,max_features,min_samples_leaf,min_samples_split,iter,total time (s)
objective_311001a6,TERMINATED,,0.543695,2,3,20,20,1.0,1.22094
objective_31122a6c,TERMINATED,,0.962971,3,3,1,25,1.0,1.37317
objective_3114f88c,TERMINATED,,0.106824,4,11,70,18,1.0,1.23572
objective_324130ea,TERMINATED,,0.532924,2,1,95,67,1.0,1.30629
objective_325de1c2,RUNNING,10.16.33.136:1165,0.102684,2,8,23,80,1.0,1.24559
objective_336c880c,RUNNING,,0.770713,3,4,72,48,,


Result for objective_336c880c:
  date: 2020-04-24_03-32-21
  done: true
  experiment_id: b2e2c3a1ba844670aa99008dc626b0fb
  experiment_tag: 6_learning_rate=0.77071,max_depth=3,max_features=4,min_samples_leaf=72,min_samples_split=48
  hostname: ip-10-16-33-136.ec2.internal
  iterations_since_restore: 1
  loss: 4.156041871821632
  node_ip: 10.16.33.136
  pid: 1171
  time_since_restore: 1.3430125713348389
  time_this_iter_s: 1.3430125713348389
  time_total_s: 1.3430125713348389
  timestamp: 1587699141
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: 336c880c
  
[2m[36m(pid=1167)[0m 2020-04-24 03:32:21,765	INFO trainable.py:217 -- Getting current IP.
[2m[36m(pid=1164)[0m 2020-04-24 03:32:21,965	INFO trainable.py:217 -- Getting current IP.
Result for objective_33949018:
  date: 2020-04-24_03-32-23
  done: true
  experiment_id: 4233645155e74977ba4b2a4c3c1e6b4c
  experiment_tag: 7_learning_rate=0.53862,max_depth=2,max_features=8,min_samples_leaf=44,min_samples_split=9
  

Trial name,status,loc,learning_rate,max_depth,max_features,min_samples_leaf,min_samples_split,iter,total time (s)
objective_311001a6,TERMINATED,,0.543695,2,3,20,20,1.0,1.22094
objective_31122a6c,TERMINATED,,0.962971,3,3,1,25,1.0,1.37317
objective_3114f88c,TERMINATED,,0.106824,4,11,70,18,1.0,1.23572
objective_324130ea,TERMINATED,,0.532924,2,1,95,67,1.0,1.30629
objective_325de1c2,TERMINATED,,0.102684,2,8,23,80,1.0,1.24559
objective_336c880c,TERMINATED,,0.770713,3,4,72,48,1.0,1.34301
objective_33949018,TERMINATED,,0.538621,2,8,44,9,1.0,1.34115
objective_34a181fa,TERMINATED,,0.430157,1,11,58,10,1.0,1.24071
objective_34c6ae58,TERMINATED,,0.0764977,1,10,33,36,1.0,1.34504
objective_35e0974a,TERMINATED,,0.373454,1,1,41,51,1.0,1.33257


Result for objective_376ff786:
  date: 2020-04-24_03-32-27
  done: true
  experiment_id: 7b730c7cd5114f0b97e0498c73fef175
  experiment_tag: 12_learning_rate=0.88566,max_depth=4,max_features=5,min_samples_leaf=84,min_samples_split=23
  hostname: ip-10-16-33-136.ec2.internal
  iterations_since_restore: 1
  loss: 4.221668429970091
  node_ip: 10.16.33.136
  pid: 1459
  time_since_restore: 1.2649900913238525
  time_this_iter_s: 1.2649900913238525
  time_total_s: 1.2649900913238525
  timestamp: 1587699147
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: 376ff786
  
[2m[36m(pid=1460)[0m 2020-04-24 03:32:28,568	INFO trainable.py:217 -- Getting current IP.
[2m[36m(pid=1613)[0m 2020-04-24 03:32:29,113	INFO trainable.py:217 -- Getting current IP.
Result for objective_37796d34:
  date: 2020-04-24_03-32-29
  done: true
  experiment_id: 16489cc6b89240499179daf26e7091fc
  experiment_tag: 13_learning_rate=0.77371,max_depth=4,max_features=5,min_samples_leaf=98,min_samples_split=81

Trial name,status,loc,learning_rate,max_depth,max_features,min_samples_leaf,min_samples_split,iter,total time (s)
objective_311001a6,TERMINATED,,0.543695,2,3,20,20,1.0,1.22094
objective_31122a6c,TERMINATED,,0.962971,3,3,1,25,1.0,1.37317
objective_3114f88c,TERMINATED,,0.106824,4,11,70,18,1.0,1.23572
objective_324130ea,TERMINATED,,0.532924,2,1,95,67,1.0,1.30629
objective_325de1c2,TERMINATED,,0.102684,2,8,23,80,1.0,1.24559
objective_336c880c,TERMINATED,,0.770713,3,4,72,48,1.0,1.34301
objective_33949018,TERMINATED,,0.538621,2,8,44,9,1.0,1.34115
objective_34a181fa,TERMINATED,,0.430157,1,11,58,10,1.0,1.24071
objective_34c6ae58,TERMINATED,,0.0764977,1,10,33,36,1.0,1.34504
objective_35e0974a,TERMINATED,,0.373454,1,1,41,51,1.0,1.33257


Result for objective_3b495f5a:
  date: 2020-04-24_03-32-34
  done: true
  experiment_id: a628c1d122654747b452e7fc8bdf825b
  experiment_tag: 18_learning_rate=0.89851,max_depth=1,max_features=2,min_samples_leaf=28,min_samples_split=54
  hostname: ip-10-16-33-136.ec2.internal
  iterations_since_restore: 1
  loss: 3.6948824373605276
  node_ip: 10.16.33.136
  pid: 1672
  time_since_restore: 1.0257251262664795
  time_this_iter_s: 1.0257251262664795
  time_total_s: 1.0257251262664795
  timestamp: 1587699154
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: 3b495f5a
  
[2m[36m(pid=1671)[0m 2020-04-24 03:32:34,981	INFO trainable.py:217 -- Getting current IP.
Result for objective_3b954622:
  date: 2020-04-24_03-32-35
  done: false
  experiment_id: b045ae73ea5349978608f1a75d7f2a0c
  experiment_tag: 19_learning_rate=0.14921,max_depth=1,max_features=9,min_samples_leaf=3,min_samples_split=6
  hostname: ip-10-16-33-136.ec2.internal
  iterations_since_restore: 1
  loss: 3.31158365759

2020-04-24 03:32:36,113	ERROR trial_runner.py:521 -- Trial objective_3c73fe12: Error processing event.
Traceback (most recent call last):
  File "/home/leninkumar.vijay/miniconda3/lib/python3.6/site-packages/ray/tune/trial_runner.py", line 467, in _process_trial
    result = self.trial_executor.fetch_result(trial)
  File "/home/leninkumar.vijay/miniconda3/lib/python3.6/site-packages/ray/tune/ray_trial_executor.py", line 381, in fetch_result
    result = ray.get(trial_future[0], DEFAULT_GET_TIMEOUT)
  File "/home/leninkumar.vijay/miniconda3/lib/python3.6/site-packages/ray/worker.py", line 1513, in get
    raise value.as_instanceof_cause()
ray.exceptions.RayTaskError(RayOutOfMemoryError): [36mray::IDLE[39m (pid=1815, ip=10.16.33.136)
  File "python/ray/_raylet.pyx", line 414, in ray._raylet.execute_task
  File "python/ray/_raylet.pyx", line 415, in ray._raylet.execute_task
  File "/home/leninkumar.vijay/miniconda3/lib/python3.6/site-packages/ray/memory_monitor.py", line 120, in raise_i

In [None]:
result_df = analysis.dataframe()

In [None]:
import matplotlib.pyplot as plt
f, ax = plt.subplots(1)#, figsize=(10,10))
xs = [i for i in range(len(result_df))]
ys = [i for i in result_df.loss]
plt.plot(xs, ys, color='green', marker='o', linestyle='dashed', linewidth=2, markersize=2)

ax.set_xlabel('No of calls', fontsize=12)
ax.set_ylabel('min(x) after n calls', fontsize=12)

In [None]:
# Don't forget to shut it down
ray.shutdown()