In [1]:
import gymnasium

from huggingface_sb3 import load_from_hub, package_to_hub
from huggingface_hub import notebook_login # To log to our Hugging Face account to be able to upload models to the Hub.

from stable_baselines3 import PPO
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.monitor import Monitor

## Learning Environment

Understanding how Gymnasium library works. It's useful for its environments in which RL can be tested out.

In [2]:
import gymnasium as gym

# create environment
env = gym.make("LunarLander-v2")

env.reset()

print("_____OBSERVATION SPACE_____ \n")
print("Observation Space Shape", env.observation_space.shape)
print("Sample observation", env.observation_space.sample())  # Get a random observation

_____OBSERVATION SPACE_____ 

Observation Space Shape (8,)
Sample observation [-5.9849072e+01  5.7347160e+01  4.5207868e+00  1.8485782e+00
 -4.9732834e-02 -1.3853170e+00  1.3320468e-01  8.0533069e-01]


In [3]:
print("\n _____ACTION SPACE_____ \n")
print("Action Space Shape", env.action_space.n)
print("Action Space Sample", env.action_space.sample())  # Take a random action


 _____ACTION SPACE_____ 

Action Space Shape 4
Action Space Sample 1


In [4]:
# reset environment
observation, info = env.reset()

for _ in range(20):
    # take random action
    action = env.action_space.sample()
    print("Action taken: ",action)

    # perform this action in environment and retrieve info
    observation, reward, terminated, truncated, info = env.step(action)

    # if the game terminated, stop environment
    if terminated or truncated:
        # reset env
        print("Environment reset")
        observation, info = env.reset()

env.close()

Action taken:  2
Action taken:  1
Action taken:  1
Action taken:  3
Action taken:  2
Action taken:  1
Action taken:  2
Action taken:  1
Action taken:  1
Action taken:  3
Action taken:  1
Action taken:  1
Action taken:  3
Action taken:  3
Action taken:  3
Action taken:  1
Action taken:  3
Action taken:  1
Action taken:  0
Action taken:  2


Create a vectorized environment, where we can stack multiple independent environments into a single one. This way we can create more diverse experiences during training

In [5]:
# Create the environment
env = make_vec_env("LunarLander-v2", n_envs=16)

## Model

Using StableBaselines3, PPO will be our model. PPO combines value-based RL and policy-based RL.

In [6]:
from stable_baselines3 import PPO

# instantiate agent
model = PPO('MlpPolicy',env,n_steps=1024,batch_size=64,n_epochs=4,gamma=0.999,gae_lambda=0.98,ent_coef=0.01,verbose=1)

Using cuda device
