In [5]:
import gymnasium as gym
import numpy as np
from stable_baselines3 import TD3
from stable_baselines3.common.noise import NormalActionNoise
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.vec_env import DummyVecEnv, VecVideoRecorder
import os
import pygame

# logging
log_dir = "./logs/"
video_dir = "./logs/videos/"
os.makedirs(log_dir, exist_ok=True)
os.makedirs(video_dir, exist_ok=True)

# seed
SEED = 42
np.random.seed(SEED)

# create : pendulum environment
def make_env():
    env = gym.make("LunarLanderContinuous-v3", render_mode="rgb_array")
    env = Monitor(env, filename=os.path.join(log_dir, "monitor.csv"))
    return env

train_env = DummyVecEnv([make_env])
train_env = VecVideoRecorder(
    train_env,
    video_folder=video_dir,
    record_video_trigger=lambda step: step % 30000 == 0,
    video_length=10000,
    name_prefix="train_lander"
)

# get the number of actions
n_actions = train_env.action_space.shape[0]

# adding action noise
action_noise = NormalActionNoise(mean=np.zeros(n_actions), sigma=0.1 * np.ones(n_actions))

# Create a TD3 agent
model = TD3(
    "MlpPolicy",  # MLP -> Multi Layer Perceptron
    train_env,    # The environment
    action_noise=action_noise,
    verbose=1,
    seed=SEED,
    learning_rate=0.001,     
    buffer_size=100000,      
    batch_size=100,          
    learning_starts=1000,    
    policy_delay=2,          
    target_policy_noise=0.2,
    target_noise_clip=0.5,
    tau=0.005,              
    gamma=0.99,
)

model.learn(total_timesteps=100000)
model.save("td3_lander")
train_env.close()

# create : test environment
test_env = DummyVecEnv([make_env])
test_env = VecVideoRecorder(
    test_env,
    video_folder=video_dir,
    record_video_trigger=lambda step: step == 0,
    video_length=600,
    name_prefix="test_lander"
)

# evaluate : agent
obs = test_env.reset()
total_reward = 0

for _ in range(200):
    action, _ = model.predict(obs, deterministic=True)
    obs, reward, done, info = test_env.step(action)
    total_reward += reward
    if done[0]:
        break

print(f"Evaluation reward: {total_reward[0]:.2f}")
test_env.close()

# GUI
gui_env = gym.make("LunarLanderContinuous-v3", render_mode="human")
obs, _ = gui_env.reset(seed=SEED)
total_reward = 0

for _ in range(1000):
    action, _ = model.predict(obs, deterministic=True)
    obs, reward, terminated, truncated, _ = gui_env.step(action)
    total_reward += reward
    if terminated or truncated:
        break

print(f"GUI Evaluation reward: {total_reward:.2f}")
gui_env.close()
pygame.quit()

Using cpu device
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 121      |
|    ep_rew_mean     | -222     |
| time/              |          |
|    episodes        | 4        |
|    fps             | 1005     |
|    time_elapsed    | 0        |
|    total_timesteps | 483      |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 112      |
|    ep_rew_mean     | -179     |
| time/              |          |
|    episodes        | 8        |
|    fps             | 1033     |
|    time_elapsed    | 0        |
|    total_timesteps | 899      |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 157      |
|    ep_rew_mean     | -294     |
| time/              |          |
|    episodes        | 12       |
|    fps             | 491      |
|    time_elapsed    | 3        |
|    total_timesteps | 1883    

                                                                             

MoviePy - Done !
MoviePy - video ready /Users/mehulxyz/dev/lunarLander/lander_td3/logs/videos/train_lander-step-0-to-step-10000.mp4
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 353      |
|    ep_rew_mean     | -243     |
| time/              |          |
|    episodes        | 36       |
|    fps             | 246      |
|    time_elapsed    | 51       |
|    total_timesteps | 12725    |
| train/             |          |
|    actor_loss      | 4.93     |
|    critic_loss     | 4.78     |
|    learning_rate   | 0.001    |
|    n_updates       | 11724    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 409      |
|    ep_rew_mean     | -230     |
| time/              |          |
|    episodes        | 40       |
|    fps             | 240      |
|    time_elapsed    | 67       |
|    total_timesteps | 16352    |
| train/             |          |
|    actor_loss   

                                                                             

MoviePy - Done !
MoviePy - video ready /Users/mehulxyz/dev/lunarLander/lander_td3/logs/videos/train_lander-step-30000-to-step-40000.mp4
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 503      |
|    ep_rew_mean     | -147     |
| time/              |          |
|    episodes        | 84       |
|    fps             | 233      |
|    time_elapsed    | 180      |
|    total_timesteps | 42256    |
| train/             |          |
|    actor_loss      | 2.4      |
|    critic_loss     | 17.8     |
|    learning_rate   | 0.001    |
|    n_updates       | 41255    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 510      |
|    ep_rew_mean     | -140     |
| time/              |          |
|    episodes        | 88       |
|    fps             | 231      |
|    time_elapsed    | 193      |
|    total_timesteps | 44906    |
| train/             |          |
|    actor_los

                                                                             

MoviePy - Done !
MoviePy - video ready /Users/mehulxyz/dev/lunarLander/lander_td3/logs/videos/train_lander-step-60000-to-step-70000.mp4
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 574      |
|    ep_rew_mean     | 10.4     |
| time/              |          |
|    episodes        | 136      |
|    fps             | 225      |
|    time_elapsed    | 311      |
|    total_timesteps | 70153    |
| train/             |          |
|    actor_loss      | -5.52    |
|    critic_loss     | 4.47     |
|    learning_rate   | 0.001    |
|    n_updates       | 69152    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 546      |
|    ep_rew_mean     | 22.6     |
| time/              |          |
|    episodes        | 140      |
|    fps             | 226      |
|    time_elapsed    | 313      |
|    total_timesteps | 70985    |
| train/             |          |
|    actor_los

                                                                             

MoviePy - Done !
MoviePy - video ready /Users/mehulxyz/dev/lunarLander/lander_td3/logs/videos/train_lander-step-90000-to-step-100000.mp4
Evaluation reward: 201.04
MoviePy - Building video /Users/mehulxyz/dev/lunarLander/lander_td3/logs/videos/test_lander-step-0-to-step-600.mp4.
MoviePy - Writing video /Users/mehulxyz/dev/lunarLander/lander_td3/logs/videos/test_lander-step-0-to-step-600.mp4



                                                              

MoviePy - Done !
MoviePy - video ready /Users/mehulxyz/dev/lunarLander/lander_td3/logs/videos/test_lander-step-0-to-step-600.mp4




GUI Evaluation reward: -15.44
