In [261]:
import os
import gymnasium as gym
import matplotlib.pyplot as plt 
# %matplotlib inline
# os.environ["SDL_VIDEODRIVER"] = "dummy"
from IPython import display

In [262]:
import ffmpegio
import numpy as np

In [263]:
from stable_baselines3 import A2C
Algo=A2C
Algo.name = "A2C"

In [264]:
env = gym.make(
    "LunarLander-v2",
    continuous  = False,
    gravity = -10.0,
    enable_wind = False,
    wind_power = 15.0,
    turbulence_power = 1.5,
    render_mode='rgb_array')

observation, info = env.reset(seed=42)
env_name="LunarLander-v2"

In [265]:
models_dir=f"models/{env_name}/{Algo.name}"
logdir = f"logs/{env_name}/{Algo.name}"
imgs_dir = f"imgs/{env_name}/{Algo.name}"

os.makedirs(models_dir, exist_ok=True)
os.makedirs(logdir, exist_ok=True)
os.makedirs(imgs_dir, exist_ok=True)

## Print Agent Information

In [266]:
print("Observation Space: ", format(env.observation_space))
print("Sample Observation", format(env.observation_space.sample()))


Observation Space:  Box([-90.        -90.         -5.         -5.         -3.1415927  -5.
  -0.         -0.       ], [90.        90.         5.         5.         3.1415927  5.
  1.         1.       ], (8,), float32)
Sample Observation [36.16341    -6.052117    3.8429792   1.1116247  -3.0068586  -2.9883487
  0.8316981   0.89760166]


In [267]:

print("Action Space       ", format(env.action_space))
print("Action Space Sample ", format(env.action_space.sample()))


Action Space        Discrete(4)
Action Space Sample  3


In [268]:
model = Algo("MlpPolicy", env, verbose=0, tensorboard_log=logdir)
vec_env = model.get_env()

In [269]:
print("Observation Space: ", format(vec_env.observation_space))
print("Sample Observation", format(vec_env.observation_space.sample()))

Observation Space:  Box([-90.        -90.         -5.         -5.         -3.1415927  -5.
  -0.         -0.       ], [90.        90.         5.         5.         3.1415927  5.
  1.         1.       ], (8,), float32)
Sample Observation [ 8.0035486e+00 -8.5507858e+01 -5.6614917e-02 -1.4018070e+00
  3.0723724e+00 -7.4252683e-01  8.5598242e-01  2.8504422e-01]


In [270]:
print("Action Space       ", format(vec_env.action_space))
print("Action Space Sample ", format(vec_env.action_space.sample()))

Action Space        Discrete(4)
Action Space Sample  2


Save Models

In [271]:
last_run = 1

In [272]:
timesteps = 40_000
for i in range(last_run ,last_run + 5):
    model.learn(total_timesteps=timesteps,reset_num_timesteps=False,tb_log_name="run_"+str(format(i,'04d')))
    model.save(f"{models_dir}/{Algo.name}_{format(i,'04d')}")
    choosen_model_name=f"{Algo.name}_{format(i,'04d')}"

## Show Whats Learned

In [273]:
# Number of steps you run the agent for 
num_episodes = 2

In [274]:
# we can now remake the env with human mode so we can render it
env = gym.make(
    "LunarLander-v2",
    continuous  = False,
    gravity = -10.0,
    enable_wind = False,
    wind_power = 15.0,
    turbulence_power = 1.5,
    render_mode='rgb_array')
# we could also change some parameters of the environment to check robustness of the agent

In [275]:
# we can now use the one of the models that we have saved to run the agent
choosen_model = Algo.load(f"{models_dir}/{choosen_model_name}",env=env)

In [276]:
for ep in range(num_episodes):
    obs,info=env.reset()
    term=False
    frames=[]
    while not term or trunc:
        action, _state = choosen_model.predict(obs, deterministic=True)
        obs, reward, term,trunc, info = env.step(action)
        if ep==num_episodes-1:
            frames+=[env.render()]
        if term or trunc:
            break

In [277]:
filename = f"./{imgs_dir}/{choosen_model_name}.mp4"
ffmpegio.video.write(filename, 30, np.array(frames),overwrite=True,show_log=True)
display.HTML(f"""<video alt="test" controls><source src="""+filename+""" type="video/mp4"></video>""")

In [278]:
env.close()