In [3]:
import gymnasium

from huggingface_sb3 import load_from_hub, package_to_hub
from huggingface_hub import (
    notebook_login,
)  # To log to our Hugging Face account to be able to upload models to the Hub.

from stable_baselines3 import PPO
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.monitor import Monitor

In [4]:
import gymnasium as gym 

# Creating the environment
env = gym.make("LunarLander-v3", render_mode="human")

# Resetting the enironment
observation, info = env.reset()

for _ in range(200):
    # Take a random action 
    action = env.action_space.sample()
    print("Action taken: ", action)

    # Apply this action in the env
    observation, reward, terminated, truncated, info = env.step(action)
    env.render()

    if terminated or truncated:
        print("Environment is reset")
        observation, info = env.reset()

env.close()

  from pkg_resources import resource_stream, resource_exists


Action taken:  3
Action taken:  0
Action taken:  2
Action taken:  1
Action taken:  3
Action taken:  1
Action taken:  1
Action taken:  2
Action taken:  1
Action taken:  0
Action taken:  3
Action taken:  3
Action taken:  2
Action taken:  0
Action taken:  1
Action taken:  0
Action taken:  2
Action taken:  2
Action taken:  0
Action taken:  0
Action taken:  1
Action taken:  0
Action taken:  3
Action taken:  0
Action taken:  1
Action taken:  2
Action taken:  3
Action taken:  1
Action taken:  3
Action taken:  1
Action taken:  3
Action taken:  3
Action taken:  1
Action taken:  2
Action taken:  1
Action taken:  0
Action taken:  3
Action taken:  0
Action taken:  2
Action taken:  2
Action taken:  0
Action taken:  2
Action taken:  2
Action taken:  3
Action taken:  2
Action taken:  2
Action taken:  0
Action taken:  1
Action taken:  2
Action taken:  0
Action taken:  3
Action taken:  0
Action taken:  1
Action taken:  0
Action taken:  3
Action taken:  3
Action taken:  3
Action taken:  2
Action taken: 

In [5]:
env = gym.make("LunarLander-v3")
env.reset()

print("___OBS SPACE___\n")
print("Observation space shape: ", env.observation_space.shape)
print("Sample Observation space: ", env.observation_space.sample())

___OBS SPACE___

Observation space shape:  (8,)
Sample Observation space:  [-2.2554545  -1.4047152  -0.60978454 -5.0573773  -3.5330036   5.566765
  0.6627417   0.24428055]


In [6]:
print("\n _____ACTION SPACE_____ \n")
print("Action Space Shape", env.action_space.n)
print("Action Space Sample", env.action_space.sample())  # Take a random action


 _____ACTION SPACE_____ 

Action Space Shape 4
Action Space Sample 3


In [7]:
env = make_vec_env("LunarLander-v3", n_envs=16)

# Setting up StableBaseline 4 Training

In [9]:
import tensorboard
env = make_vec_env("LunarLander-v3", n_envs=16)

# Instantiate the agent
model = PPO('MlpPolicy', 
            env=env, 
            tensorboard_log="./LunarLander-v3_tensorboard/",
            n_steps=1024, 
            batch_size=64, 
            n_epochs=4, 
            gamma=0.999, 
            gae_lambda=0.98, 
            ent_coef=0.01,
            verbose=1)

# Train the agent
model.learn(total_timesteps=int(1e6), progress_bar=True)

# Save Model to file
model.save("my_LunarLander-v2_model")

# Evaluation

In [11]:
eval_env = gym.make("LunarLander-v3")
eval_env = Monitor(eval_env)

trained_model = PPO.load("my_LunarLander-v2_model", env=eval_env)

mean_reward, std_reward = evaluate_policy(trained_model, trained_model.get_env(), n_eval_episodes=10, deterministic=True)
print(f"mean_reward={mean_reward:.2f} +/- {std_reward}")

# Graphical Visualisation

In [13]:
# Creating the environment
env = gym.make("LunarLander-v3", render_mode="human")
trained_model = PPO.load("my_LunarLander-v2_model", env=eval_env)

# Resetting the enironment
observation, info = env.reset()

for _ in range(200):
    # Take a random action 
    action, state = trained_model.predict(observation, )
    
    print("Action taken: ", action)

    # Apply this action in the env
    observation, reward, terminated, truncated, info = env.step(action)
    env.render()

    if terminated or truncated:
        print("Environment is reset")
        observation, info = env.reset()

Wrapping the env in a DummyVecEnv.
Action taken:  0
Action taken:  2
Action taken:  2
Action taken:  3
Action taken:  2
Action taken:  2
Action taken:  2
Action taken:  1
Action taken:  3
Action taken:  2
Action taken:  2
Action taken:  0
Action taken:  2
Action taken:  2
Action taken:  1
Action taken:  2
Action taken:  3
Action taken:  2
Action taken:  2
Action taken:  2
Action taken:  1
Action taken:  3
Action taken:  3
Action taken:  2
Action taken:  2
Action taken:  3
Action taken:  2
Action taken:  2
Action taken:  1
Action taken:  2
Action taken:  1
Action taken:  3
Action taken:  2
Action taken:  2
Action taken:  2
Action taken:  2
Action taken:  2
Action taken:  2
Action taken:  0
Action taken:  2
Action taken:  2
Action taken:  2
Action taken:  2
Action taken:  2
Action taken:  2
Action taken:  2
Action taken:  2
Action taken:  2
Action taken:  2
Action taken:  1
Action taken:  2
Action taken:  3
Action taken:  2
Action taken:  2
Action taken:  1
Action taken:  0
Action taken: