In [None]:
import os
from pyvirtualdisplay import Display

virtual_display = Display(visible=0, size=(1400, 900))
virtual_display.start()

In [None]:

# RL starts here

import gymnasium
from gymnasium.wrappers import RecordVideo

from stable_baselines3 import PPO # Proximal Policy Optimization
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.monitor import Monitor

from pathlib import Path

In [None]:
# Create the LunarLander environment for training
env = gymnasium.make("LunarLander-v3")

In [None]:
# Create a fresh environment for training (must be new each time you run this cell)
env = gymnasium.make("LunarLander-v3")

# Create the policy model.
model = PPO(
    policy="MlpPolicy",
    env=env,
    n_steps=1024,
    batch_size=64,
    n_epochs=4,
    gamma=0.999,
    gae_lambda=0.98,
    ent_coef=0.01,
    verbose=1,
    device='cpu'
)

# Train the PPO model.
model.learn(total_timesteps=1000000)

# Eval (higher reward is better)
eval_env = Monitor(gymnasium.make("LunarLander-v3"))
mean_reward, std_reward = evaluate_policy(model, eval_env, n_eval_episodes=10, deterministic=True)
print(f"mean_reward={mean_reward:.2f} +/- {std_reward}")


model_name = "ppo-LunarLander-v3"
model.save(model_name)

In [None]:
# Create a fresh environment for video recording
video_folder = Path("./videos/lunar-lander")
video_folder.mkdir(exist_ok=True)

record_env = gymnasium.make("LunarLander-v3", render_mode="rgb_array")
video_env = RecordVideo(record_env, video_folder=str(video_folder), episode_trigger=lambda x: True)

obs, info = video_env.reset()

for _ in range(1):
    done = False
    while not done:
        # Get observation of environment, tell model to take an action.
        # action, _ = model.predict(obs, deterministic=True)

        # random_action to show how bad things can be.
        action = env.action_space.sample()

        obs, reward, terminated, truncated, info = video_env.step(action)
        done = terminated or truncated  # Either out of bounds or we crashed.
    obs, info = video_env.reset()

video_env.close()
print("Videos saved to ./videos/")