In [2]:
import warnings
warnings.filterwarnings('ignore')
import numpy as np

In [3]:
import ray
from ray.rllib import agents
ray.init(log_to_driver=False) # Skip or set to ignore if already called

from envs.point_mass_env import PointMassEnv 
from ray.tune.logger import pretty_print

Instructions for updating:
non-resource variables are not supported in the long term


2021-04-09 22:51:54,466	INFO services.py:1174 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8268[39m[22m
No module named 'flow'
No module named 'carla'


In [4]:
from ray.tune.registry import register_env

def env_creator(env_config):
    return PointMassEnv('maze2d-open-dense-v0')

register_env("point_mass_1", env_creator)

In [5]:
from ray.rllib.agents.callbacks import DefaultCallbacks

class CustomCallbacks(DefaultCallbacks):
    def on_episode_end(self, worker, base_env,
                       policies, episode,
                       **kwargs):
        success = int(episode.last_info_for()['success'])
        last_obs = episode.last_observation_for()
        pos = last_obs[:2]
        target = last_obs[4:6]
        dist = np.linalg.norm(pos - target)
        episode.custom_metrics['dist'] = dist
        episode.custom_metrics["success"] = success

## Their Model

In [5]:
config = {
          'num_workers': 1,
          'train_batch_size': 1000,
          'log_level': 'ERROR',
          'framework': 'torch',
          'callbacks': CustomCallbacks,
          'model': {
              'fcnet_hiddens': [128, 128],
          }}
trainer = agents.ppo.PPOTrainer(env='point_mass_1', config=config)
for i in range(1000):
    results = trainer.train()
    print(f"episode {i}, mean rew {results['episode_reward_mean']}," +
          f"success_mean {results['custom_metrics']['success_mean']}, dist {results['custom_metrics']['dist_mean']}")

RaySystemError: System error: Ray has not been started yet. You can start Ray with 'ray.init()'.

# Our Model

In [10]:
config = {
          'num_workers': 1,
          'train_batch_size': 200,
          'log_level': 'ERROR',
          'framework': 'torch',
          'callbacks': CustomCallbacks,
              'lambda': .99,
              'num_sgd_iter': 4,
              'lr': 1e-5,
              'vf_loss_coeff': .05,
              'entropy_coeff': .01,
              'clip_param': .2,
              'vf_clip_param': .2,
              'grad_clip': .5,
          'model': {
              'fcnet_hiddens': [64, 64],
          }}
trainer = agents.ppo.PPOTrainer(env='point_mass_1', config=config)
for i in range(1000):
    results = trainer.train()
    print(f"episode {i}, mean rew {results['episode_reward_mean']}," +
          f"success_mean {results['custom_metrics']['success_mean']}, dist {results['custom_metrics']['dist_mean']}")



episode 0, mean rew 1.0692866670458385,success_mean 0.0, dist 1.9715892846976069
episode 1, mean rew 0.8894788660171702,success_mean 0.0, dist 1.7873492818147683
episode 2, mean rew 0.7707202149347199,success_mean 0.0, dist 1.747219061249293
episode 3, mean rew 1.4870071118250645,success_mean 0.0, dist 1.6146722070907749
episode 4, mean rew 1.4751989947015547,success_mean 0.0, dist 1.6230163641327033
episode 5, mean rew 1.3336213128305683,success_mean 0.0, dist 1.5965751981046032
episode 6, mean rew 1.216758760241136,success_mean 0.0, dist 1.6749183910915009
episode 7, mean rew 1.1032890394815946,success_mean 0.0, dist 1.6637052698397556
episode 8, mean rew 1.0496191374475825,success_mean 0.0, dist 1.6709231376646045
episode 9, mean rew 1.100692563938615,success_mean 0.0, dist 1.6377839572556991
episode 10, mean rew 1.0230912662509597,success_mean 0.0, dist 1.6849154009430667
episode 11, mean rew 1.0660560557408691,success_mean 0.0, dist 1.6120337411991557
episode 12, mean rew 1.002128

episode 101, mean rew 1.2461697698656606,success_mean 0.0, dist 1.5057621882110346
episode 102, mean rew 1.2452612304584156,success_mean 0.0, dist 1.515415764849753
episode 103, mean rew 1.246289680431168,success_mean 0.0, dist 1.5108534689326043
episode 104, mean rew 1.2626394432501244,success_mean 0.0, dist 1.508515509044965
episode 105, mean rew 1.260262873966135,success_mean 0.0, dist 1.5078802411763073
episode 106, mean rew 1.2559027805817002,success_mean 0.0, dist 1.514632267456452
episode 107, mean rew 1.3047084425947046,success_mean 0.0, dist 1.51176339255784
episode 108, mean rew 1.314726969193822,success_mean 0.0, dist 1.5219233772410072
episode 109, mean rew 1.3116824226067294,success_mean 0.0, dist 1.5235500902645105
episode 110, mean rew 1.3028018296585038,success_mean 0.0, dist 1.5265747097833091
episode 111, mean rew 1.3131203781149978,success_mean 0.0, dist 1.518290222611409
episode 112, mean rew 1.3235313418206311,success_mean 0.0, dist 1.52616385670905
episode 113, me

episode 201, mean rew 1.6848208974493502,success_mean 0.0, dist 1.8203422107948564
episode 202, mean rew 1.6861296814105555,success_mean 0.0, dist 1.7989577358904179
episode 203, mean rew 1.6913557042115572,success_mean 0.0, dist 1.8080467853670985
episode 204, mean rew 1.6949922621009728,success_mean 0.0, dist 1.8252827901678563
episode 205, mean rew 1.6951911623208402,success_mean 0.0, dist 1.823357407097734
episode 206, mean rew 1.6793156383480083,success_mean 0.0, dist 1.8341055962938657
episode 207, mean rew 1.6753047729473738,success_mean 0.0, dist 1.836161191289773
episode 208, mean rew 1.6816033598580105,success_mean 0.0, dist 1.8523076478646785
episode 209, mean rew 1.6776109116419764,success_mean 0.0, dist 1.8771294519802206
episode 210, mean rew 1.670334391604258,success_mean 0.0, dist 1.8926127622147215
episode 211, mean rew 1.6751322143922434,success_mean 0.0, dist 1.9067347168883475
episode 212, mean rew 1.6827198039446147,success_mean 0.0, dist 1.9224638312448767
episode

episode 301, mean rew 1.735326896350896,success_mean 0.0, dist 2.2616406493463925
episode 302, mean rew 1.71279831691212,success_mean 0.0, dist 2.282541466787379
episode 303, mean rew 1.7203734005862206,success_mean 0.0, dist 2.29270567235481
episode 304, mean rew 1.7339406481722248,success_mean 0.0, dist 2.291837101359293
episode 305, mean rew 1.7370140549687418,success_mean 0.0, dist 2.304319831918282
episode 306, mean rew 1.7408194566251423,success_mean 0.0, dist 2.304994964568728
episode 307, mean rew 1.7422918560962275,success_mean 0.0, dist 2.3038252981972924
episode 308, mean rew 1.7256818326021184,success_mean 0.0, dist 2.3111561813576205
episode 309, mean rew 1.7273682818721243,success_mean 0.0, dist 2.3221491359412756
episode 310, mean rew 1.7405505454856458,success_mean 0.0, dist 2.3224438688791564
episode 311, mean rew 1.7522566680353635,success_mean 0.0, dist 2.3152238202545505
episode 312, mean rew 1.7591288029019017,success_mean 0.0, dist 2.3148139435892237
episode 313, 

KeyboardInterrupt: 

# Our Model, their Hparams

In [None]:
config = {
          'num_workers': 1,
          'log_level': 'ERROR',
          'framework': 'torch',
          'callbacks': CustomCallbacks,
             'train_batch_size': 1000,
#               'lambda': .99,
#               'num_sgd_iter': 4,
#               'lr': 1e-5,
#               'vf_loss_coeff': .05,
#               'entropy_coeff': .01,
#               'clip_param': .2,
#               'vf_clip_param': .2,
#               'grad_clip': .5,
          'model': {
              'fcnet_hiddens': [64, 64],
          }}
trainer = agents.ppo.PPOTrainer(env='point_mass_1', config=config)
for i in range(1000):
    results = trainer.train()
    print(f"episode {i}, mean rew {results['episode_reward_mean']}," +
          f"success_mean {results['custom_metrics']['success_mean']}, dist {results['custom_metrics']['dist_mean']}")



episode 0, mean rew 0.6130870731157936,success_mean 0.0, dist 1.5921449389108202
episode 1, mean rew 1.2603985894650935,success_mean 0.0, dist 1.3978968519769315
episode 2, mean rew 1.6397175687765189,success_mean 0.0, dist 1.2829645914351056
episode 3, mean rew 1.8413079892639617,success_mean 0.0, dist 1.270840889907793
episode 4, mean rew 2.156830142168495,success_mean 0.0, dist 1.2729382458015874
episode 5, mean rew 2.3076811655311666,success_mean 0.0, dist 1.4099414971591138
episode 6, mean rew 2.4904063874259212,success_mean 0.0, dist 1.462403880433009
episode 7, mean rew 2.6058275301371445,success_mean 0.0, dist 1.5258415119505744
episode 8, mean rew 2.7845356533821137,success_mean 0.0, dist 1.5302406507837254
episode 9, mean rew 2.9226877111560543,success_mean 0.0, dist 1.509508168439038
episode 10, mean rew 3.0721563019944016,success_mean 0.0, dist 1.5143305709169286
episode 11, mean rew 3.2288280920816184,success_mean 0.0, dist 1.5142189454024064
episode 12, mean rew 3.3611624

# Their Model, our HParams

In [None]:
config = {
          'num_workers': 1,
          'train_batch_size': 200,
          'log_level': 'ERROR',
          'framework': 'torch',
          'callbacks': CustomCallbacks,
              'lambda': .99,
              'num_sgd_iter': 4,
              'lr': 1e-5,
              'vf_loss_coeff': .05,
              'entropy_coeff': .01,
              'clip_param': .2,
              'vf_clip_param': .2,
              'grad_clip': .5,
          'model': {
              'fcnet_hiddens': [128, 128],
          }}
trainer = agents.ppo.PPOTrainer(env='point_mass_1', config=config)
for i in range(1000):
    results = trainer.train()
    print(f"episode {i}, mean rew {results['episode_reward_mean']}," +
          f"success_mean {results['custom_metrics']['success_mean']}, dist {results['custom_metrics']['dist_mean']}")

2021-04-09 22:43:54,709	INFO trainer.py:643 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.


episode 0, mean rew -0.0042672618553116475,success_mean 0.0, dist 2.1171825753317055
episode 1, mean rew 1.5317670709560858,success_mean 0.0, dist 2.1168891402773533
episode 2, mean rew 0.9739439059429491,success_mean 0.0, dist 1.9867344200294526
episode 3, mean rew 0.8237294325591751,success_mean 0.0, dist 1.8542234287824317
episode 4, mean rew 0.7392553537863841,success_mean 0.0, dist 1.9621321681932928
episode 5, mean rew 0.6879680612319949,success_mean 0.0, dist 1.8686331072202176
episode 6, mean rew 0.6471540448907273,success_mean 0.0, dist 1.8828189734036525
episode 7, mean rew 0.6223198834984571,success_mean 0.0, dist 1.9148429112651197
episode 8, mean rew 0.5609702963733796,success_mean 0.0, dist 1.9498221809292697
episode 9, mean rew 0.5457376445191924,success_mean 0.0, dist 1.9496464271918301
episode 10, mean rew 0.5276753572721965,success_mean 0.0, dist 1.9644001709102592
episode 11, mean rew 0.6260047230030841,success_mean 0.0, dist 1.8619265667816627
episode 12, mean rew 0

episode 101, mean rew 2.03805883288095,success_mean 0.0, dist 1.4326718034122674
episode 102, mean rew 2.0465902553308677,success_mean 0.0, dist 1.4425701936604058
episode 103, mean rew 2.0390998270790375,success_mean 0.0, dist 1.4463530936097984
episode 104, mean rew 2.0415901550173414,success_mean 0.0, dist 1.466597917731608
episode 105, mean rew 2.0687726722647612,success_mean 0.0, dist 1.4734228487301126
episode 106, mean rew 2.0939962515285613,success_mean 0.0, dist 1.4781209199826921
episode 107, mean rew 2.1139095339072127,success_mean 0.0, dist 1.4641704002921372
episode 108, mean rew 2.1132250754643063,success_mean 0.0, dist 1.4844154311268545
episode 109, mean rew 2.1280303235399796,success_mean 0.0, dist 1.4833678036011246
episode 110, mean rew 2.1210107444448103,success_mean 0.0, dist 1.4859147530418424
episode 111, mean rew 2.143188494957774,success_mean 0.0, dist 1.4787769118658556
episode 112, mean rew 2.149481793009953,success_mean 0.0, dist 1.504376194940434
episode 11

episode 201, mean rew 2.490302331781076,success_mean 0.0, dist 1.9498509315596035
episode 202, mean rew 2.4753532168725108,success_mean 0.0, dist 1.9526074051103932
episode 203, mean rew 2.4514987054803434,success_mean 0.0, dist 1.960125408662647
episode 204, mean rew 2.4477538549012148,success_mean 0.0, dist 1.9501082140516985
episode 205, mean rew 2.4423686042581134,success_mean 0.0, dist 1.9534608394269517
episode 206, mean rew 2.450574392766683,success_mean 0.0, dist 1.9544467430680155
episode 207, mean rew 2.4441594583480417,success_mean 0.0, dist 1.9508545287474564
episode 208, mean rew 2.436896290344578,success_mean 0.0, dist 1.9570077439326594
episode 209, mean rew 2.44213071049116,success_mean 0.0, dist 1.9458371851421492
episode 210, mean rew 2.442929650163508,success_mean 0.0, dist 1.9600426463706293
episode 211, mean rew 2.457484048587423,success_mean 0.0, dist 1.966885338546061
episode 212, mean rew 2.4748893065344655,success_mean 0.0, dist 1.9823214918416707
episode 213, 

episode 301, mean rew 2.8662081246261963,success_mean 0.0, dist 1.7041437631562235
episode 302, mean rew 2.9019564482797375,success_mean 0.0, dist 1.6937425664635253
episode 303, mean rew 2.9200072506673966,success_mean 0.0, dist 1.700389640105303
episode 304, mean rew 2.9520961737816656,success_mean 0.0, dist 1.6960740936425844
episode 305, mean rew 2.9842967600367065,success_mean 0.0, dist 1.6818117467068492
episode 306, mean rew 2.9914767575236403,success_mean 0.0, dist 1.7074555101029205
episode 307, mean rew 2.990142518893389,success_mean 0.0, dist 1.7079685427185303
episode 308, mean rew 2.9770000290875975,success_mean 0.0, dist 1.718628421839635
episode 309, mean rew 2.9893221988939267,success_mean 0.0, dist 1.7214112675452728
episode 310, mean rew 2.990709655277357,success_mean 0.0, dist 1.727488689157064
episode 311, mean rew 2.9889947641838277,success_mean 0.0, dist 1.7479412824370661
episode 312, mean rew 3.0081645456508768,success_mean 0.0, dist 1.7374858838134788
episode 3

episode 401, mean rew 3.3412829858694817,success_mean 0.0, dist 1.4311042505168725
episode 402, mean rew 3.340401623312373,success_mean 0.0, dist 1.4221485159440383
episode 403, mean rew 3.3553360937869843,success_mean 0.0, dist 1.4210553885855077
episode 404, mean rew 3.3862658237303584,success_mean 0.0, dist 1.4094546670428405
episode 405, mean rew 3.41266556411176,success_mean 0.0, dist 1.396818818208349
episode 406, mean rew 3.41083037911155,success_mean 0.0, dist 1.4027833936209761
episode 407, mean rew 3.395177023093952,success_mean 0.0, dist 1.412915058578149
episode 408, mean rew 3.407619511617723,success_mean 0.0, dist 1.4144976942516694
episode 409, mean rew 3.4074140754539606,success_mean 0.0, dist 1.4127292345034668
episode 410, mean rew 3.434559125366109,success_mean 0.0, dist 1.4055493298619235
episode 411, mean rew 3.4189292636134736,success_mean 0.0, dist 1.417013804167158
episode 412, mean rew 3.432837452209805,success_mean 0.0, dist 1.424998368302492
episode 413, mean

# HParam Sweep

In [8]:
config = { # THEIRS: train_batch_size, num_sgd_iter, lr, entropy
          'num_workers': 1,
          'log_level': 'ERROR',
          'framework': 'torch',
          'callbacks': CustomCallbacks,
          'train_batch_size': 1000,
              'lambda': .99,
#               'num_sgd_iter': 4,
#               'lr': 1e-5,
              'vf_loss_coeff': .05,
#               'entropy_coeff': .01,
#               'clip_param': .2,
#               'vf_clip_param': .2,
              'grad_clip': .5,
          'model': {
              'fcnet_hiddens': [128, 128],
          }}
trainer = agents.ppo.PPOTrainer(env='point_mass_1', config=config)
for i in range(1000):
    results = trainer.train()
    print(f"episode {i}, mean rew {results['episode_reward_mean']}," +
          f"success_mean {results['custom_metrics']['success_mean']}, dist {results['custom_metrics']['dist_mean']}")



episode 0, mean rew 0.6936445512157147,success_mean 0.0, dist 1.5130763623598125
episode 1, mean rew 1.180317509282308,success_mean 0.0, dist 1.3928303533079698
episode 2, mean rew 1.5209233748379507,success_mean 0.0, dist 1.4111612057029457
episode 3, mean rew 2.0358760571766514,success_mean 0.0, dist 1.4332206199709197
episode 4, mean rew 2.409555216153366,success_mean 0.0, dist 1.407999173847923
episode 5, mean rew 2.713123403655735,success_mean 0.0, dist 1.4516784846771074
episode 6, mean rew 2.9830066569648244,success_mean 0.0, dist 1.4131335758919696
episode 7, mean rew 3.271533921816679,success_mean 0.0, dist 1.3680228569363557
episode 8, mean rew 3.4457980651898565,success_mean 0.0, dist 1.3529607477770382
episode 9, mean rew 3.575116924405371,success_mean 0.0, dist 1.3430542497183464
episode 10, mean rew 3.717299983807245,success_mean 0.0, dist 1.3322161155898171
episode 11, mean rew 3.834955948554685,success_mean 0.0, dist 1.321343120642565
episode 12, mean rew 3.935236997627

episode 101, mean rew 6.301915209821405,success_mean 0.01, dist 0.3208584799708498
episode 102, mean rew 6.321233479409588,success_mean 0.01, dist 0.3139016550779648
episode 103, mean rew 6.379942715057857,success_mean 0.04, dist 0.3006784223139882
episode 104, mean rew 6.434317165654787,success_mean 0.06, dist 0.2855488231172446
episode 105, mean rew 6.45468644485481,success_mean 0.06, dist 0.2791311509505089
episode 106, mean rew 6.45942274431593,success_mean 0.06, dist 0.2772138395620911
episode 107, mean rew 6.461741179758622,success_mean 0.06, dist 0.2748486412990433
episode 108, mean rew 6.460803853043865,success_mean 0.06, dist 0.27000392181987914
episode 109, mean rew 6.476869853697382,success_mean 0.06, dist 0.26433501895308603
episode 110, mean rew 6.49896129800594,success_mean 0.06, dist 0.25982152438935324
episode 111, mean rew 6.494985100541798,success_mean 0.06, dist 0.25508812174388673
episode 112, mean rew 6.50308814859464,success_mean 0.06, dist 0.24300830930853454
epi

episode 201, mean rew 5.14231194400443,success_mean 0.0, dist 0.5978376465885791
episode 202, mean rew 5.193747479354833,success_mean 0.0, dist 0.5797299559339837
episode 203, mean rew 5.234138019992291,success_mean 0.0, dist 0.5646270565175183
episode 204, mean rew 5.252884777237647,success_mean 0.0, dist 0.5598246788055072
episode 205, mean rew 5.270521539730438,success_mean 0.0, dist 0.5569084426562465
episode 206, mean rew 5.308454947815162,success_mean 0.0, dist 0.5442762354083686
episode 207, mean rew 5.331642315494328,success_mean 0.0, dist 0.5363362875850346
episode 208, mean rew 5.363510676495877,success_mean 0.0, dist 0.5260679427569931
episode 209, mean rew 5.380410282321063,success_mean 0.0, dist 0.5184775708428134
episode 210, mean rew 5.388417227465993,success_mean 0.0, dist 0.5118774879955521
episode 211, mean rew 5.400999379953938,success_mean 0.0, dist 0.5065265469194664
episode 212, mean rew 5.39843181773162,success_mean 0.0, dist 0.5048478711550721
episode 213, mean 

RayTaskError(RayOutOfMemoryError): [36mray::RolloutWorker.par_iter_next()[39m (pid=26788, ip=192.168.1.61)
  File "python/ray/_raylet.pyx", line 440, in ray._raylet.execute_task
  File "/home/olivia/anaconda3/envs/meta_mb/lib/python3.7/site-packages/ray/memory_monitor.py", line 132, in raise_if_low_memory
    self.error_threshold))
ray.memory_monitor.RayOutOfMemoryError: More than 95% of the memory on node ignasi-desktop is used (30.16 / 31.26 GB). The top 10 memory consumers are:

PID	MEM	COMMAND
29346	4.1GiB	/home/olivia/anaconda3/envs/meta_mb/bin/python /opt/pycharm-community-2019.2.4/helpers/pydev/pydevd.
3194	2.55GiB	/usr/lib/x86_64-linux-gnu/hud/hud-service
29704	2.41GiB	/snap/pycharm-professional/240/jbr/bin/java -classpath /snap/pycharm-professional/240/lib/bootstrap.
27578	1.74GiB	/home/olivia/anaconda3/envs/meta_mb/bin/python -m ipykernel_launcher -f /home/olivia/.local/share/ju
25070	1.67GiB	/home/olivia/anaconda3/envs/meta_mb/bin/python -m ipykernel_launcher -f /home/olivia/.local/share/ju
25924	1.66GiB	/home/olivia/anaconda3/envs/meta_mb/bin/python -m ipykernel_launcher -f /home/olivia/.local/share/ju
26426	1.66GiB	/home/olivia/anaconda3/envs/meta_mb/bin/python -m ipykernel_launcher -f /home/olivia/.local/share/ju
4915	1.58GiB	/opt/pycharm-community-2019.2.4/jbr/bin/java -classpath /opt/pycharm-community-2019.2.4/lib/bootstra
3948	0.73GiB	/usr/lib/firefox/firefox -contentproc -childID 6 -isForBrowser -prefsLen 6965 -prefMapSize 195736 -p
3927	0.69GiB	/usr/lib/firefox/firefox -contentproc -childID 5 -isForBrowser -prefsLen 6965 -prefMapSize 195736 -p

In addition, up to 0.65 GiB of shared memory is currently being used by the Ray object store.
---
--- Tip: Use the `ray memory` command to list active objects in the cluster.
---