In [4]:
import gymnasium as gym
import numpy as np

from stable_baselines3 import DQN

In [3]:
model = DQN(
    "MlpPolicy",
    "LunarLander-v3",
    verbose=1,
    exploration_final_eps=0.1,
    target_update_interval=250,
)

Using cpu device
Creating environment from the given name 'LunarLander-v3'
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


In [2]:
import gymnasium as gym
from stable_baselines3 import DQN
from stable_baselines3.common.callbacks import EvalCallback

# ✅ Create environments
train_env = gym.make("LunarLander-v3")

eval_env = gym.make("LunarLander-v3")

# ✅ Setup evaluation callback to save best model
eval_callback = EvalCallback(
    eval_env,
    best_model_save_path="./best_model/",
    log_path="./logs/",
    eval_freq=10000,
    n_eval_episodes=20,
    deterministic=True,
    render=False,
)

# ✅ Create and train model
model = DQN("MlpPolicy", train_env, verbose=1)

model.learn(total_timesteps=200_000, callback=eval_callback)

# ✅ Save final model too (optional)
model.save("dqn_lunar")


In [5]:
from stable_baselines3 import DQN
import gymnasium as gym
import imageio
import numpy as np

# ✅ Load trained model
model = DQN.load("./best_model/best_model")

# ✅ Create env that returns frames
env = gym.make("LunarLander-v3", render_mode="rgb_array")
obs, _ = env.reset()

# ✅ Create video writer
video_path = "lunar_lander_dqn.mp4"
writer = imageio.get_writer(video_path, fps=50, format='FFMPEG')

# ✅ Run the agent and write frames
for _ in range(1000):
    frame = env.render()  # returns RGB array
    frame = np.asarray(frame).astype(np.uint8)  # ensure correct type
    writer.append_data(frame)

    action, _ = model.predict(obs, deterministic=True)
    obs, reward, terminated, truncated, _ = env.step(action)

    if terminated or truncated:
        obs, _ = env.reset()

writer.close()
env.close()

print(f"🎥 Video saved as '{video_path}'")




🎥 Video saved as 'lunar_lander_dqn.mp4'
