In [None]:
import gymnasium as gym
from stable_baselines3 import DQN
import matplotlib.pyplot as plt
from matplotlib import animation
import numpy as np
import os

env = gym.make('LunarLander-v2', render_mode="rgb_array")

model = DQN("MlpPolicy", env, verbose=1)

model.learn(total_timesteps=50000)

model.save("lander-dqn2")

model = DQN.load("lander-dqn2", env=env)

obs, _ = env.reset()
frames = []
for _ in range(2000):
    action, _ = model.predict(obs, deterministic=True)
    obs, reward, terminated, truncated, info = env.step(action)

    frame = env.render()
    frames.append(frame)

    if terminated or truncated:
        obs, _ = env.reset()

env.close()

def create_video(frames, filename='lander-dqn2.mp4', fps=30):
    height, width, _ = frames[0].shape
    video = np.array(frames)

    fig = plt.figure()
    im = plt.imshow(frames[0])

    def update_frame(i):
        im.set_array(video[i])
        return [im]

    ani = animation.FuncAnimation(fig, update_frame, frames=len(frames), interval=1000/fps)

    ani.save(filename, fps=fps, extra_args=['-vcodec', 'libx264'])

create_video(frames)

In [None]:
import gymnasium as gym
from stable_baselines3 import A2C
import matplotlib.pyplot as plt
from matplotlib import animation
import numpy as np
import os

env = gym.make('LunarLander-v2', render_mode="rgb_array")

model = A2C("MlpPolicy", env, verbose=1)

model.learn(total_timesteps=50000)

model.save("lander2-a2c")

model = A2C.load("lander2-a2c", env=env)

obs, _ = env.reset()
frames = []
for _ in range(2000):
    action, _ = model.predict(obs, deterministic=True)
    obs, reward, terminated, truncated, info = env.step(action)

    frame = env.render()
    frames.append(frame)

    if terminated or truncated:
        obs, _ = env.reset()

env.close()

def create_video(frames, filename='lander2-a2c.mp4', fps=30):
    height, width, _ = frames[0].shape
    video = np.array(frames)

    fig = plt.figure()
    im = plt.imshow(frames[0])

    def update_frame(i):
        im.set_array(video[i])
        return [im]

    ani = animation.FuncAnimation(fig, update_frame, frames=len(frames), interval=1000/fps)

    ani.save(filename, fps=fps, extra_args=['-vcodec', 'libx264'])

create_video(frames)

In [None]:
import gymnasium as gym
from stable_baselines3 import PPO
import matplotlib.pyplot as plt
from matplotlib import animation
import numpy as np
import os

env = gym.make('LunarLander-v2', render_mode="rgb_array")

model = PPO("MlpPolicy", env, verbose=1)

model.learn(total_timesteps=50000)

model.save("lander2-ppo")

model = PPO.load("lander2-ppo", env=env)

obs, _ = env.reset()
frames = []
for _ in range(2000):
    action, _ = model.predict(obs, deterministic=True)
    obs, reward, terminated, truncated, info = env.step(action)

    frame = env.render()
    frames.append(frame)

    if terminated or truncated:
        obs, _ = env.reset()

env.close()

def create_video(frames, filename='lander2-ppo.mp4', fps=30):
    height, width, _ = frames[0].shape
    video = np.array(frames)

    fig = plt.figure()
    im = plt.imshow(frames[0])

    def update_frame(i):
        im.set_array(video[i])
        return [im]

    ani = animation.FuncAnimation(fig, update_frame, frames=len(frames), interval=1000/fps)

    ani.save(filename, fps=fps, extra_args=['-vcodec', 'libx264'])

create_video(frames)