In [None]:
import gym
import numpy as np

env = gym.make("BipedalWalker-v3")
obs = env.reset()
for _ in range(200):
    obs, reward, _, _ = env.step([1, 0, 1, 0])
    print(f"This is the reward: {reward}")
    env.render()
env.close()

### Calculate expected cumulative reward per episode in `BipedalWalker-v3`

In [None]:
import gym
import numpy as np

env = gym.make("BipedalWalker-v3")
num_episodes = 100
cumulative_rewards = np.array([])
for ep in range(num_episodes):
    cumulative_reward_this_ep = 0
    obs = env.reset()
    while True:
        env_obs, reward, isDone, _ = env.step([1, 1, 1, 0])
        cumulative_reward_this_ep += reward
        if isDone:
            break
    cumulative_rewards = np.append(cumulative_rewards, cumulative_reward_this_ep)
expected_cumulative_rewards = cumulative_rewards.mean()
print(f"The exepected cumulative reward is {expected_cumulative_rewards}")

### Teach the robot in the `BipedalWalker-v3` environment how to walk using `rllib`'s PPO implementation

In [None]:
from ray import tune
ray.init()

tune.run("PPO",
        config={
            "env": "BipedalWalker-v3",
            "evaluation_interval": 100,
            "evaluation_num_episodes": 100
        },
        local_dir="BipedalWalker-v3",
        checkpoint_freq=100
)

In [None]:
# Visualize results using tensorboard
!tensorboard BipedalWalker_v3

### Create a video of the walking robot

In [None]:
# Restore the PPO agent from the last checkpoint file and save it in a variable called agent
from ray.rllib.agents.ppo.ppo import PPOTrainer

agent = PPOTrainer(config={
            "env": "BipedalWalker-v3",
            "evaluation_interval": 100,
            "evaluation_num_episodes": 100
        }
)
agent.restore("./BipedalWalker-v3/PPO/PPO_BipedalWalker-v3_36655_00000_0_2022-06-06_12-29-34/checkpoint_000224/checkpoint-224")

In [None]:
import gym
from gym.wrappers import RecordVideo


env = RecordVideo(gym.make("BipedalWalker-v3"), "ppo_video")
obs = env.reset()
while True:
    action = agent.compute_action(obs)
    obs, r, done, _ = env.step(action)
    if done:
        break
env.close()