In [1]:
import warnings
warnings.filterwarnings('ignore')
import numpy as np

In [2]:
import ray
from ray.rllib import agents
ray.init(log_to_driver=False) # Skip or set to ignore if already called

from envs.point_mass_env import PointMassEnv 
from ray.tune.logger import pretty_print

Instructions for updating:
non-resource variables are not supported in the long term


2021-04-09 22:43:28,380	INFO services.py:1174 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8267[39m[22m
No module named 'flow'
No module named 'carla'


In [3]:
from ray.tune.registry import register_env

def env_creator(env_config):
    return PointMassEnv('maze2d-open-dense-v0')

register_env("point_mass_1", env_creator)

In [4]:
from ray.rllib.agents.callbacks import DefaultCallbacks

class CustomCallbacks(DefaultCallbacks):
    def on_episode_end(self, worker, base_env,
                       policies, episode,
                       **kwargs):
        success = int(episode.last_info_for()['success'])
        last_obs = episode.last_observation_for()
        pos = last_obs[:2]
        target = last_obs[4:6]
        dist = np.linalg.norm(pos - target)
        episode.custom_metrics['dist'] = dist
        episode.custom_metrics["success"] = success

## Their Model

In [5]:
config = {
          'num_workers': 1,
          'train_batch_size': 1000,
          'log_level': 'ERROR',
          'framework': 'torch',
          'callbacks': CustomCallbacks,
          'model': {
              'fcnet_hiddens': [128, 128],
          }}
trainer = agents.ppo.PPOTrainer(env='point_mass_1', config=config)
for i in range(1000):
    results = trainer.train()
    print(f"episode {i}, mean rew {results['episode_reward_mean']}," +
          f"success_mean {results['custom_metrics']['success_mean']}, dist {results['custom_metrics']['dist_mean']}")

RaySystemError: System error: Ray has not been started yet. You can start Ray with 'ray.init()'.

# Our Model

In [10]:
config = {
          'num_workers': 1,
          'train_batch_size': 200,
          'log_level': 'ERROR',
          'framework': 'torch',
          'callbacks': CustomCallbacks,
              'lambda': .99,
              'num_sgd_iter': 4,
              'lr': 1e-5,
              'vf_loss_coeff': .05,
              'entropy_coeff': .01,
              'clip_param': .2,
              'vf_clip_param': .2,
              'grad_clip': .5,
          'model': {
              'fcnet_hiddens': [64, 64],
          }}
trainer = agents.ppo.PPOTrainer(env='point_mass_1', config=config)
for i in range(1000):
    results = trainer.train()
    print(f"episode {i}, mean rew {results['episode_reward_mean']}," +
          f"success_mean {results['custom_metrics']['success_mean']}, dist {results['custom_metrics']['dist_mean']}")



episode 0, mean rew 1.0692866670458385,success_mean 0.0, dist 1.9715892846976069
episode 1, mean rew 0.8894788660171702,success_mean 0.0, dist 1.7873492818147683
episode 2, mean rew 0.7707202149347199,success_mean 0.0, dist 1.747219061249293
episode 3, mean rew 1.4870071118250645,success_mean 0.0, dist 1.6146722070907749
episode 4, mean rew 1.4751989947015547,success_mean 0.0, dist 1.6230163641327033
episode 5, mean rew 1.3336213128305683,success_mean 0.0, dist 1.5965751981046032
episode 6, mean rew 1.216758760241136,success_mean 0.0, dist 1.6749183910915009
episode 7, mean rew 1.1032890394815946,success_mean 0.0, dist 1.6637052698397556
episode 8, mean rew 1.0496191374475825,success_mean 0.0, dist 1.6709231376646045
episode 9, mean rew 1.100692563938615,success_mean 0.0, dist 1.6377839572556991
episode 10, mean rew 1.0230912662509597,success_mean 0.0, dist 1.6849154009430667
episode 11, mean rew 1.0660560557408691,success_mean 0.0, dist 1.6120337411991557
episode 12, mean rew 1.002128

episode 101, mean rew 1.2461697698656606,success_mean 0.0, dist 1.5057621882110346
episode 102, mean rew 1.2452612304584156,success_mean 0.0, dist 1.515415764849753
episode 103, mean rew 1.246289680431168,success_mean 0.0, dist 1.5108534689326043
episode 104, mean rew 1.2626394432501244,success_mean 0.0, dist 1.508515509044965
episode 105, mean rew 1.260262873966135,success_mean 0.0, dist 1.5078802411763073
episode 106, mean rew 1.2559027805817002,success_mean 0.0, dist 1.514632267456452
episode 107, mean rew 1.3047084425947046,success_mean 0.0, dist 1.51176339255784
episode 108, mean rew 1.314726969193822,success_mean 0.0, dist 1.5219233772410072
episode 109, mean rew 1.3116824226067294,success_mean 0.0, dist 1.5235500902645105
episode 110, mean rew 1.3028018296585038,success_mean 0.0, dist 1.5265747097833091
episode 111, mean rew 1.3131203781149978,success_mean 0.0, dist 1.518290222611409
episode 112, mean rew 1.3235313418206311,success_mean 0.0, dist 1.52616385670905
episode 113, me

episode 201, mean rew 1.6848208974493502,success_mean 0.0, dist 1.8203422107948564
episode 202, mean rew 1.6861296814105555,success_mean 0.0, dist 1.7989577358904179
episode 203, mean rew 1.6913557042115572,success_mean 0.0, dist 1.8080467853670985
episode 204, mean rew 1.6949922621009728,success_mean 0.0, dist 1.8252827901678563
episode 205, mean rew 1.6951911623208402,success_mean 0.0, dist 1.823357407097734
episode 206, mean rew 1.6793156383480083,success_mean 0.0, dist 1.8341055962938657
episode 207, mean rew 1.6753047729473738,success_mean 0.0, dist 1.836161191289773
episode 208, mean rew 1.6816033598580105,success_mean 0.0, dist 1.8523076478646785
episode 209, mean rew 1.6776109116419764,success_mean 0.0, dist 1.8771294519802206
episode 210, mean rew 1.670334391604258,success_mean 0.0, dist 1.8926127622147215
episode 211, mean rew 1.6751322143922434,success_mean 0.0, dist 1.9067347168883475
episode 212, mean rew 1.6827198039446147,success_mean 0.0, dist 1.9224638312448767
episode

episode 301, mean rew 1.735326896350896,success_mean 0.0, dist 2.2616406493463925
episode 302, mean rew 1.71279831691212,success_mean 0.0, dist 2.282541466787379
episode 303, mean rew 1.7203734005862206,success_mean 0.0, dist 2.29270567235481
episode 304, mean rew 1.7339406481722248,success_mean 0.0, dist 2.291837101359293
episode 305, mean rew 1.7370140549687418,success_mean 0.0, dist 2.304319831918282
episode 306, mean rew 1.7408194566251423,success_mean 0.0, dist 2.304994964568728
episode 307, mean rew 1.7422918560962275,success_mean 0.0, dist 2.3038252981972924
episode 308, mean rew 1.7256818326021184,success_mean 0.0, dist 2.3111561813576205
episode 309, mean rew 1.7273682818721243,success_mean 0.0, dist 2.3221491359412756
episode 310, mean rew 1.7405505454856458,success_mean 0.0, dist 2.3224438688791564
episode 311, mean rew 1.7522566680353635,success_mean 0.0, dist 2.3152238202545505
episode 312, mean rew 1.7591288029019017,success_mean 0.0, dist 2.3148139435892237
episode 313, 

KeyboardInterrupt: 

# Our Model, their Hparams

In [None]:
config = {
          'num_workers': 1,
          'log_level': 'ERROR',
          'framework': 'torch',
          'callbacks': CustomCallbacks,
             'train_batch_size': 1000,
#               'lambda': .99,
#               'num_sgd_iter': 4,
#               'lr': 1e-5,
#               'vf_loss_coeff': .05,
#               'entropy_coeff': .01,
#               'clip_param': .2,
#               'vf_clip_param': .2,
#               'grad_clip': .5,
          'model': {
              'fcnet_hiddens': [64, 64],
          }}
trainer = agents.ppo.PPOTrainer(env='point_mass_1', config=config)
for i in range(1000):
    results = trainer.train()
    print(f"episode {i}, mean rew {results['episode_reward_mean']}," +
          f"success_mean {results['custom_metrics']['success_mean']}, dist {results['custom_metrics']['dist_mean']}")



episode 0, mean rew 0.6130870731157936,success_mean 0.0, dist 1.5921449389108202
episode 1, mean rew 1.2603985894650935,success_mean 0.0, dist 1.3978968519769315
episode 2, mean rew 1.6397175687765189,success_mean 0.0, dist 1.2829645914351056
episode 3, mean rew 1.8413079892639617,success_mean 0.0, dist 1.270840889907793
episode 4, mean rew 2.156830142168495,success_mean 0.0, dist 1.2729382458015874
episode 5, mean rew 2.3076811655311666,success_mean 0.0, dist 1.4099414971591138
episode 6, mean rew 2.4904063874259212,success_mean 0.0, dist 1.462403880433009
episode 7, mean rew 2.6058275301371445,success_mean 0.0, dist 1.5258415119505744
episode 8, mean rew 2.7845356533821137,success_mean 0.0, dist 1.5302406507837254
episode 9, mean rew 2.9226877111560543,success_mean 0.0, dist 1.509508168439038
episode 10, mean rew 3.0721563019944016,success_mean 0.0, dist 1.5143305709169286
episode 11, mean rew 3.2288280920816184,success_mean 0.0, dist 1.5142189454024064
episode 12, mean rew 3.3611624

# Their Model, our HParams

In [5]:
config = {
          'num_workers': 1,
          'train_batch_size': 200,
          'log_level': 'ERROR',
          'framework': 'torch',
          'callbacks': CustomCallbacks,
              'lambda': .99,
              'num_sgd_iter': 4,
              'lr': 1e-5,
              'vf_loss_coeff': .05,
              'entropy_coeff': .01,
              'clip_param': .2,
              'vf_clip_param': .2,
              'grad_clip': .5,
          'model': {
              'fcnet_hiddens': [128, 128],
          }}
trainer = agents.ppo.PPOTrainer(env='point_mass_1', config=config)
for i in range(1000):
    results = trainer.train()
    print(f"episode {i}, mean rew {results['episode_reward_mean']}," +
          f"success_mean {results['custom_metrics']['success_mean']}, dist {results['custom_metrics']['dist_mean']}")

2021-04-09 22:43:54,709	INFO trainer.py:643 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.


episode 0, mean rew -0.0042672618553116475,success_mean 0.0, dist 2.1171825753317055
episode 1, mean rew 1.5317670709560858,success_mean 0.0, dist 2.1168891402773533
episode 2, mean rew 0.9739439059429491,success_mean 0.0, dist 1.9867344200294526
episode 3, mean rew 0.8237294325591751,success_mean 0.0, dist 1.8542234287824317
episode 4, mean rew 0.7392553537863841,success_mean 0.0, dist 1.9621321681932928
episode 5, mean rew 0.6879680612319949,success_mean 0.0, dist 1.8686331072202176
episode 6, mean rew 0.6471540448907273,success_mean 0.0, dist 1.8828189734036525
episode 7, mean rew 0.6223198834984571,success_mean 0.0, dist 1.9148429112651197
episode 8, mean rew 0.5609702963733796,success_mean 0.0, dist 1.9498221809292697
episode 9, mean rew 0.5457376445191924,success_mean 0.0, dist 1.9496464271918301
episode 10, mean rew 0.5276753572721965,success_mean 0.0, dist 1.9644001709102592
episode 11, mean rew 0.6260047230030841,success_mean 0.0, dist 1.8619265667816627
episode 12, mean rew 0

episode 101, mean rew 2.03805883288095,success_mean 0.0, dist 1.4326718034122674
episode 102, mean rew 2.0465902553308677,success_mean 0.0, dist 1.4425701936604058
episode 103, mean rew 2.0390998270790375,success_mean 0.0, dist 1.4463530936097984
episode 104, mean rew 2.0415901550173414,success_mean 0.0, dist 1.466597917731608
episode 105, mean rew 2.0687726722647612,success_mean 0.0, dist 1.4734228487301126
episode 106, mean rew 2.0939962515285613,success_mean 0.0, dist 1.4781209199826921
episode 107, mean rew 2.1139095339072127,success_mean 0.0, dist 1.4641704002921372
episode 108, mean rew 2.1132250754643063,success_mean 0.0, dist 1.4844154311268545
episode 109, mean rew 2.1280303235399796,success_mean 0.0, dist 1.4833678036011246
episode 110, mean rew 2.1210107444448103,success_mean 0.0, dist 1.4859147530418424
episode 111, mean rew 2.143188494957774,success_mean 0.0, dist 1.4787769118658556
episode 112, mean rew 2.149481793009953,success_mean 0.0, dist 1.504376194940434
episode 11

episode 201, mean rew 2.490302331781076,success_mean 0.0, dist 1.9498509315596035
episode 202, mean rew 2.4753532168725108,success_mean 0.0, dist 1.9526074051103932
episode 203, mean rew 2.4514987054803434,success_mean 0.0, dist 1.960125408662647
episode 204, mean rew 2.4477538549012148,success_mean 0.0, dist 1.9501082140516985
episode 205, mean rew 2.4423686042581134,success_mean 0.0, dist 1.9534608394269517
episode 206, mean rew 2.450574392766683,success_mean 0.0, dist 1.9544467430680155
episode 207, mean rew 2.4441594583480417,success_mean 0.0, dist 1.9508545287474564
episode 208, mean rew 2.436896290344578,success_mean 0.0, dist 1.9570077439326594
episode 209, mean rew 2.44213071049116,success_mean 0.0, dist 1.9458371851421492
episode 210, mean rew 2.442929650163508,success_mean 0.0, dist 1.9600426463706293
episode 211, mean rew 2.457484048587423,success_mean 0.0, dist 1.966885338546061
episode 212, mean rew 2.4748893065344655,success_mean 0.0, dist 1.9823214918416707
episode 213, 

episode 301, mean rew 2.8662081246261963,success_mean 0.0, dist 1.7041437631562235
episode 302, mean rew 2.9019564482797375,success_mean 0.0, dist 1.6937425664635253
episode 303, mean rew 2.9200072506673966,success_mean 0.0, dist 1.700389640105303
episode 304, mean rew 2.9520961737816656,success_mean 0.0, dist 1.6960740936425844
episode 305, mean rew 2.9842967600367065,success_mean 0.0, dist 1.6818117467068492
episode 306, mean rew 2.9914767575236403,success_mean 0.0, dist 1.7074555101029205
episode 307, mean rew 2.990142518893389,success_mean 0.0, dist 1.7079685427185303
episode 308, mean rew 2.9770000290875975,success_mean 0.0, dist 1.718628421839635
episode 309, mean rew 2.9893221988939267,success_mean 0.0, dist 1.7214112675452728
episode 310, mean rew 2.990709655277357,success_mean 0.0, dist 1.727488689157064
episode 311, mean rew 2.9889947641838277,success_mean 0.0, dist 1.7479412824370661
episode 312, mean rew 3.0081645456508768,success_mean 0.0, dist 1.7374858838134788
episode 3

episode 401, mean rew 3.3412829858694817,success_mean 0.0, dist 1.4311042505168725
episode 402, mean rew 3.340401623312373,success_mean 0.0, dist 1.4221485159440383
episode 403, mean rew 3.3553360937869843,success_mean 0.0, dist 1.4210553885855077
episode 404, mean rew 3.3862658237303584,success_mean 0.0, dist 1.4094546670428405
episode 405, mean rew 3.41266556411176,success_mean 0.0, dist 1.396818818208349
episode 406, mean rew 3.41083037911155,success_mean 0.0, dist 1.4027833936209761
episode 407, mean rew 3.395177023093952,success_mean 0.0, dist 1.412915058578149
episode 408, mean rew 3.407619511617723,success_mean 0.0, dist 1.4144976942516694
episode 409, mean rew 3.4074140754539606,success_mean 0.0, dist 1.4127292345034668
episode 410, mean rew 3.434559125366109,success_mean 0.0, dist 1.4055493298619235
episode 411, mean rew 3.4189292636134736,success_mean 0.0, dist 1.417013804167158
episode 412, mean rew 3.432837452209805,success_mean 0.0, dist 1.424998368302492
episode 413, mean

episode 501, mean rew 3.745252517480098,success_mean 0.0, dist 1.3312773970953884
episode 502, mean rew 3.7448705246913034,success_mean 0.0, dist 1.3305195425829508
episode 503, mean rew 3.752649382772869,success_mean 0.0, dist 1.331733709849274
episode 504, mean rew 3.7656834820539076,success_mean 0.0, dist 1.3253427000056923
episode 505, mean rew 3.7579496550096447,success_mean 0.0, dist 1.3297913434213258
episode 506, mean rew 3.755340515769406,success_mean 0.0, dist 1.3278631595936432
episode 507, mean rew 3.7602738224846646,success_mean 0.0, dist 1.3301892188773559
episode 508, mean rew 3.787213565080175,success_mean 0.0, dist 1.3186758583481861
episode 509, mean rew 3.802730523686159,success_mean 0.0, dist 1.31130189991483
episode 510, mean rew 3.814949212358707,success_mean 0.0, dist 1.3082386709144163
episode 511, mean rew 3.83440210719997,success_mean 0.0, dist 1.2986174907823178
episode 512, mean rew 3.8422458914741147,success_mean 0.0, dist 1.2865975005177064
episode 513, me

episode 601, mean rew 4.381311138120847,success_mean 0.01, dist 1.0651909724159763
episode 602, mean rew 4.403895954874594,success_mean 0.01, dist 1.053725152802171
episode 603, mean rew 4.409524642222482,success_mean 0.01, dist 1.0523590317295355
episode 604, mean rew 4.399907348152643,success_mean 0.01, dist 1.0552281867425988
episode 605, mean rew 4.402807643620443,success_mean 0.01, dist 1.0519614433025952
episode 606, mean rew 4.423718164230264,success_mean 0.01, dist 1.0495286446158865
episode 607, mean rew 4.415803011213523,success_mean 0.01, dist 1.052439164970461
episode 608, mean rew 4.421817372541345,success_mean 0.01, dist 1.05154082797362
episode 609, mean rew 4.430278472264695,success_mean 0.01, dist 1.0542027812257602
episode 610, mean rew 4.421449070532666,success_mean 0.01, dist 1.0568420907545106
episode 611, mean rew 4.415959107708588,success_mean 0.01, dist 1.0637916309391247
episode 612, mean rew 4.400466925783142,success_mean 0.01, dist 1.0624202306560684
episode 

episode 701, mean rew 4.3985017564374465,success_mean 0.0, dist 1.2730058281706165
episode 702, mean rew 4.421866142853625,success_mean 0.0, dist 1.2673649197625612
episode 703, mean rew 4.42486045546337,success_mean 0.0, dist 1.2633618005707634
episode 704, mean rew 4.4055246812764075,success_mean 0.0, dist 1.265169556898453
episode 705, mean rew 4.411972350373429,success_mean 0.0, dist 1.2587868945851453
episode 706, mean rew 4.416375434846638,success_mean 0.0, dist 1.2549608303191695
episode 707, mean rew 4.4235393260719045,success_mean 0.0, dist 1.252242135543464
episode 708, mean rew 4.426697593965391,success_mean 0.0, dist 1.249450413514027
episode 709, mean rew 4.428950656715838,success_mean 0.0, dist 1.2460015545496517
episode 710, mean rew 4.441121957501688,success_mean 0.0, dist 1.2443171847073582
episode 711, mean rew 4.447036027825729,success_mean 0.0, dist 1.2433092719840295
episode 712, mean rew 4.458736676525074,success_mean 0.0, dist 1.2434022232076918
episode 713, mean

episode 802, mean rew 4.859868427799929,success_mean 0.0, dist 1.0249450436197913
episode 803, mean rew 4.850994562373483,success_mean 0.0, dist 1.0269005513874887
episode 804, mean rew 4.833566368784205,success_mean 0.0, dist 1.030893772611
episode 805, mean rew 4.830721976967505,success_mean 0.0, dist 1.0301550574231089
episode 806, mean rew 4.81985421956478,success_mean 0.0, dist 1.0340214854326022
episode 807, mean rew 4.836550064875726,success_mean 0.0, dist 1.0300095217726541
episode 808, mean rew 4.83155833638941,success_mean 0.0, dist 1.0333425033615007
episode 809, mean rew 4.836780301391921,success_mean 0.0, dist 1.0347278449896693
episode 810, mean rew 4.8332195259848705,success_mean 0.0, dist 1.037166606145545
episode 811, mean rew 4.826495290977245,success_mean 0.0, dist 1.0393153521421084
episode 812, mean rew 4.852073182438477,success_mean 0.0, dist 1.031352907169063
episode 813, mean rew 4.853265243341188,success_mean 0.0, dist 1.0325734856420725
episode 814, mean rew 4

episode 903, mean rew 5.032390691443709,success_mean 0.0, dist 0.9494482266227919
episode 904, mean rew 5.032443294776709,success_mean 0.0, dist 0.9481986438851286
episode 905, mean rew 5.04300919675892,success_mean 0.0, dist 0.9480527963380454
episode 906, mean rew 5.045562689139503,success_mean 0.0, dist 0.9477730597991915
episode 907, mean rew 5.040348619691746,success_mean 0.0, dist 0.944238307189574
episode 908, mean rew 5.05694131757818,success_mean 0.0, dist 0.9441658512104572
episode 909, mean rew 5.05714527385138,success_mean 0.0, dist 0.9435584104382009
episode 910, mean rew 5.076096725489654,success_mean 0.0, dist 0.9398505911315954
episode 911, mean rew 5.078357882033023,success_mean 0.0, dist 0.9341905478933861
episode 912, mean rew 5.080817906526336,success_mean 0.0, dist 0.9330095652444195
episode 913, mean rew 5.076292037273664,success_mean 0.0, dist 0.9342099243595322
episode 914, mean rew 5.069842571637574,success_mean 0.0, dist 0.937302632426844
episode 915, mean rew

# HParam Sweep

In [7]:
config = { # THEIRS: train_batch_size, num_sgd_iter, lr, entropy
          'num_workers': 1,
          'log_level': 'ERROR',
          'framework': 'torch',
          'callbacks': CustomCallbacks,
          'train_batch_size': 1000,
              'lambda': .99,
#               'num_sgd_iter': 4,
#               'lr': 1e-5,
              'vf_loss_coeff': .05,
#               'entropy_coeff': .01,
              'clip_param': .2,
              'vf_clip_param': .2,
              'grad_clip': .5,
          'model': {
              'fcnet_hiddens': [128, 128],
          }}
trainer = agents.ppo.PPOTrainer(env='point_mass_1', config=config)
for i in range(1000):
    results = trainer.train()
    print(f"episode {i}, mean rew {results['episode_reward_mean']}," +
          f"success_mean {results['custom_metrics']['success_mean']}, dist {results['custom_metrics']['dist_mean']}")



episode 0, mean rew 0.6478838926511056,success_mean 0.0, dist 1.8423061860964733
episode 1, mean rew 0.6525990826612399,success_mean 0.0, dist 1.7955986089841456
episode 2, mean rew 0.7730340213464987,success_mean 0.0, dist 1.6929233844314144
episode 3, mean rew 1.2421696372219133,success_mean 0.0, dist 1.5653887697517308
episode 4, mean rew 1.6290689243444008,success_mean 0.0, dist 1.4657500123557197
episode 5, mean rew 2.018774702980039,success_mean 0.0, dist 1.3818087427671226
episode 6, mean rew 2.3384774500963,success_mean 0.0, dist 1.272779234576227
episode 7, mean rew 2.710140492005158,success_mean 0.0, dist 1.2164174941842698
episode 8, mean rew 2.8372736380039614,success_mean 0.0, dist 1.1978151508693038
episode 9, mean rew 3.0682613797288885,success_mean 0.0, dist 1.16522768030149
episode 10, mean rew 3.246318577762725,success_mean 0.0, dist 1.1224596482257516
episode 11, mean rew 3.43458172587284,success_mean 0.0, dist 1.0830760798608734
episode 12, mean rew 3.61197690693341

episode 100, mean rew 7.165863021732348,success_mean 0.01, dist 0.2687359904179937
episode 101, mean rew 7.160733097624867,success_mean 0.01, dist 0.2618757333898535
episode 102, mean rew 7.134932223124898,success_mean 0.01, dist 0.2667764865984354
episode 103, mean rew 7.116993852482189,success_mean 0.01, dist 0.2660972543005743
episode 104, mean rew 7.0966400705908885,success_mean 0.01, dist 0.26686452962495955
episode 105, mean rew 7.087588589703136,success_mean 0.01, dist 0.2665706485358109
episode 106, mean rew 7.0877830094918854,success_mean 0.01, dist 0.26509261809660367
episode 107, mean rew 7.0881160150754985,success_mean 0.01, dist 0.26153296416901695
episode 108, mean rew 7.08304724366386,success_mean 0.0, dist 0.26010943951807336
episode 109, mean rew 7.085325122958024,success_mean 0.0, dist 0.257916147601816
episode 110, mean rew 7.083482674754508,success_mean 0.0, dist 0.25278904662935087
episode 111, mean rew 7.074460663768149,success_mean 0.0, dist 0.25032092053873883
e

episode 200, mean rew 5.63437595998045,success_mean 0.0, dist 0.5844642283912492
episode 201, mean rew 5.640450422427819,success_mean 0.0, dist 0.583028680199395
episode 202, mean rew 5.651508224204041,success_mean 0.0, dist 0.5773453961064302
episode 203, mean rew 5.658851807073326,success_mean 0.0, dist 0.5718998035906806
episode 204, mean rew 5.65865684026945,success_mean 0.0, dist 0.5692021335441413
episode 205, mean rew 5.666936280647609,success_mean 0.0, dist 0.5628382597387528
episode 206, mean rew 5.657814789412831,success_mean 0.0, dist 0.5654358343557823
episode 207, mean rew 5.67072724210988,success_mean 0.0, dist 0.5626308480066187
episode 208, mean rew 5.681473824681754,success_mean 0.0, dist 0.5600501806717879
episode 209, mean rew 5.7050361188529255,success_mean 0.0, dist 0.5521854497571613
episode 210, mean rew 5.717080512979303,success_mean 0.0, dist 0.5467852927298389
episode 211, mean rew 5.75636394888729,success_mean 0.0, dist 0.5357957888698389
episode 212, mean re

RayTaskError(RayOutOfMemoryError): [36mray::RolloutWorker.par_iter_next()[39m (pid=26063, ip=192.168.1.61)
  File "python/ray/_raylet.pyx", line 440, in ray._raylet.execute_task
  File "/home/olivia/anaconda3/envs/meta_mb/lib/python3.7/site-packages/ray/memory_monitor.py", line 132, in raise_if_low_memory
    self.error_threshold))
ray.memory_monitor.RayOutOfMemoryError: More than 95% of the memory on node ignasi-desktop is used (29.83 / 31.26 GB). The top 10 memory consumers are:

PID	MEM	COMMAND
29346	3.83GiB	/home/olivia/anaconda3/envs/meta_mb/bin/python /opt/pycharm-community-2019.2.4/helpers/pydev/pydevd.
3194	2.55GiB	/usr/lib/x86_64-linux-gnu/hud/hud-service
29704	2.41GiB	/snap/pycharm-professional/240/jbr/bin/java -classpath /snap/pycharm-professional/240/lib/bootstrap.
27578	1.74GiB	/home/olivia/anaconda3/envs/meta_mb/bin/python -m ipykernel_launcher -f /home/olivia/.local/share/ju
25070	1.67GiB	/home/olivia/anaconda3/envs/meta_mb/bin/python -m ipykernel_launcher -f /home/olivia/.local/share/ju
25924	1.66GiB	/home/olivia/anaconda3/envs/meta_mb/bin/python -m ipykernel_launcher -f /home/olivia/.local/share/ju
26426	1.66GiB	/home/olivia/anaconda3/envs/meta_mb/bin/python -m ipykernel_launcher -f /home/olivia/.local/share/ju
4915	1.58GiB	/opt/pycharm-community-2019.2.4/jbr/bin/java -classpath /opt/pycharm-community-2019.2.4/lib/bootstra
3948	0.76GiB	/usr/lib/firefox/firefox -contentproc -childID 6 -isForBrowser -prefsLen 6965 -prefMapSize 195736 -p
3927	0.71GiB	/usr/lib/firefox/firefox -contentproc -childID 5 -isForBrowser -prefsLen 6965 -prefMapSize 195736 -p

In addition, up to 0.65 GiB of shared memory is currently being used by the Ray object store.
---
--- Tip: Use the `ray memory` command to list active objects in the cluster.
---