In [None]:
!pip install ray[rllib] tensorflow
!pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu116

In [7]:
import argparse
import gymnasium as gym
import os

import numpy as np
import ray
from ray.air import Checkpoint
from ray.air.config import RunConfig
from ray.train.rl.rl_predictor import RLPredictor
from ray.train.rl.rl_trainer import RLTrainer
from ray.air.config import ScalingConfig
from ray.air.result import Result
from ray.rllib.algorithms.bc import BC
from ray.tune.tuner import Tuner

In [8]:
def train_rl_ppo_online(num_workers: int, use_gpu: bool = False) -> Result:
    print("Starting online training")
    trainer = RLTrainer(
        run_config=RunConfig(stop={"training_iteration": 5}),
        scaling_config=ScalingConfig(num_workers=num_workers, use_gpu=use_gpu),
        algorithm="PPO",
        config={
            "env": "CartPole-v1",
            "framework": "tf",
        },
    )
    # Todo (krfricke/xwjiang): Enable checkpoint config in RunConfig
    # result = trainer.fit()
    tuner = Tuner(
        trainer,
        _tuner_kwargs={"checkpoint_at_end": True},
    )
    result = tuner.fit()[0]
    return result

In [9]:
def evaluate_using_checkpoint(checkpoint: Checkpoint, num_episodes) -> list:
    predictor = RLPredictor.from_checkpoint(checkpoint)

    env = gym.make("CartPole-v1")

    rewards = []
    for i in range(num_episodes):
        obs = env.reset()
        reward = 0.0
        done = False
        while not done:
            action = predictor.predict(np.array([obs]))
            obs, r, done, _ = env.step(action[0])
            reward += r
        rewards.append(reward)

    return rewards

In [12]:
result = train_rl_ppo_online(num_workers=2, use_gpu=False)

Starting online training


2023-02-21 23:55:44,189	INFO worker.py:1538 -- Started a local Ray instance.


0,1
Current time:,2023-02-21 23:58:41
Running for:,00:02:48.40
Memory:,4.1/15.5 GiB

Trial name,# failures,error file
AIRPPO_e1b56_00000,1,/home/nishant/ray_results/AIRPPO_2023-02-21_23-55-22/AIRPPO_e1b56_00000_0_2023-02-21_23-55-52/error.txt

Trial name,status,loc
AIRPPO_e1b56_00000,ERROR,


2023-02-21 23:55:52,615	INFO algorithm_config.py:2503 -- Your framework setting is 'tf', meaning you are using static-graph mode. Set framework='tf2' to enable eager execution with tf2.x. You may also then want to set eager_tracing=True in order to reach similar execution speed as with static-graph mode.
2023-02-21 23:55:52,618	INFO algorithm_config.py:2503 -- Your framework setting is 'tf', meaning you are using static-graph mode. Set framework='tf2' to enable eager execution with tf2.x. You may also then want to set eager_tracing=True in order to reach similar execution speed as with static-graph mode.
[2m[36m(pid=846107)[0m   np.bool8: (False, True),
[2m[36m(pid=846107)[0m Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages
[2m[36m(pid=846107)[0m   declare_namespace(pkg)
[2m[36m(pid=846107)[0m Implementing implicit

Trial name,trial_id
AIRPPO_e1b56_00000,e1b56_00000


2023-02-21 23:58:41,020	ERROR ray_trial_executor.py:118 -- An exception occurred when trying to stop the Ray actor:Traceback (most recent call last):
  File "/run/media/nishant/Data/Work/uni/thesis/repo/env/lib/python3.9/site-packages/ray/tune/execution/ray_trial_executor.py", line 109, in _post_stop_cleanup
    ray.get(future, timeout=timeout)
  File "/run/media/nishant/Data/Work/uni/thesis/repo/env/lib/python3.9/site-packages/ray/_private/client_mode_hook.py", line 105, in wrapper
    return func(*args, **kwargs)
  File "/run/media/nishant/Data/Work/uni/thesis/repo/env/lib/python3.9/site-packages/ray/_private/worker.py", line 2311, in get
    raise value
ray.exceptions.RayActorError: The actor died because of an error raised in its creation task, [36mray::AIRRLTrainer.__init__()[39m (pid=846107, ip=192.168.10.111, repr=AIRPPO)
  File "/run/media/nishant/Data/Work/uni/thesis/repo/env/lib/python3.9/site-packages/ray/train/rl/rl_trainer.py", line 209, in __init__
    super(AIRRLTraine

In [15]:
result

Result(metrics={'trial_id': 'e1b56_00000'}, error=TuneError('Failure # 1 (occurred at 2023-02-21_23-58-40)\nTraceback (most recent call last):\n  File "/run/media/nishant/Data/Work/uni/thesis/repo/env/lib/python3.9/site-packages/ray/tune/execution/ray_trial_executor.py", line 1070, in get_next_executor_event\n    future_result = ray.get(ready_future)\n  File "/run/media/nishant/Data/Work/uni/thesis/repo/env/lib/python3.9/site-packages/ray/_private/client_mode_hook.py", line 105, in wrapper\n    return func(*args, **kwargs)\n  File "/run/media/nishant/Data/Work/uni/thesis/repo/env/lib/python3.9/site-packages/ray/_private/worker.py", line 2311, in get\n    raise value\nray.exceptions.RayActorError: The actor died because of an error raised in its creation task, \x1b[36mray::AIRRLTrainer.__init__()\x1b[39m (pid=846107, ip=192.168.10.111, repr=AIRPPO)\n  File "/run/media/nishant/Data/Work/uni/thesis/repo/env/lib/python3.9/site-packages/ray/train/rl/rl_trainer.py", line 209, in __init__\n  

In [13]:
num_eval_episodes = 3

rewards = evaluate_using_checkpoint(result.checkpoint, num_episodes=num_eval_episodes)
print(f"Average reward over {num_eval_episodes} episodes: " f"{np.mean(rewards)}")

AttributeError: 'NoneType' object has no attribute '_local_path'