In [None]:
!pip install swig cmake stable-baselines3 gymnasium huggingface_sb3

In [1]:
import swig

In [2]:
import gymnasium

from huggingface_sb3 import load_from_hub, package_to_hub
from huggingface_hub import notebook_login # To log to our Hugging Face account to be able to upload models to the Hub.

from stable_baselines3 import PPO
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.monitor import Monitor

In [15]:
!pip install tqdm


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.2.1[0m[39;49m -> [0m[32;49m25.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [3]:
import gymnasium as gym

In [4]:
env = gym.make('LunarLander-v3')

In [None]:
!pip install "gymnasium[box2d]"

In [5]:
env = gym.make('LunarLander-v3')
observation, info = env.reset()
for _ in range(20):
    action = env.action_space.sample()
    print("Action taken: ", action)
    observation, reward, terminated, truncated, info = env.step(action)
    if terminated or truncated:
        env.reset()

env.close()

Action taken:  0
Action taken:  1
Action taken:  2
Action taken:  3
Action taken:  0
Action taken:  1
Action taken:  0
Action taken:  3
Action taken:  2
Action taken:  3
Action taken:  3
Action taken:  3
Action taken:  2
Action taken:  1
Action taken:  1
Action taken:  3
Action taken:  0
Action taken:  3
Action taken:  3
Action taken:  2


In [6]:
env = gym.make('LunarLander-v3')
env.reset()
print(env.observation_space.shape)
print(env.observation_space.sample())

(8,)
[ 6.0606297e-02  7.1379337e-03  5.8026123e+00 -8.7129183e+00
 -2.1987245e+00 -1.8236195e-01  8.3468002e-01  8.5315770e-01]


In [7]:
env = make_vec_env('LunarLander-v3', n_envs=16)

In [8]:
env = gym.make('LunarLander-v3')
model = PPO(
    'MlpPolicy',
    env,
    verbose=1,
    n_steps=1024,
    batch_size=64,
    n_epochs=4,
    gamma=0.99,
    gae_lambda=0.98,
    ent_coef=0.01,
)

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


In [9]:
model_name = "ppo-LunarLander-v3-subhojit"

In [10]:
model.learn(total_timesteps=int(1e5))
model.save(model_name)

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 83.5     |
|    ep_rew_mean     | -174     |
| time/              |          |
|    fps             | 7503     |
|    iterations      | 1        |
|    time_elapsed    | 0        |
|    total_timesteps | 1024     |
---------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 87.2         |
|    ep_rew_mean          | -205         |
| time/                   |              |
|    fps                  | 5720         |
|    iterations           | 2            |
|    time_elapsed         | 0            |
|    total_timesteps      | 2048         |
| train/                  |              |
|    approx_kl            | 0.0012510428 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.39        |
|    explained_variance   | -0.00288     |
|    learning_r

In [11]:
#@title
eval_env = Monitor(gym.make("LunarLander-v3", render_mode='rgb_array'))
mean_reward, std_reward = evaluate_policy(model, eval_env, n_eval_episodes=10, deterministic=True)
print(f"mean_reward={mean_reward:.2f} +/- {std_reward}")

mean_reward=-239.48 +/- 38.60018181361497


In [None]:
!pip install ipywidgets

In [12]:
notebook_login()
!git config --global credential.helper store

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [16]:
import gymnasium as gym

from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.env_util import make_vec_env

from huggingface_sb3 import package_to_hub

# PLACE the variables you've just defined two cells above
# Define the name of the environment
env_id = "LunarLander-v3"

# TODO: Define the model architecture we used
model_architecture = "PPO"

## Define a repo_id
## repo_id is the id of the model repository from the Hugging Face Hub (repo_id = {organization}/{repo_name} for instance ThomasSimonini/ppo-LunarLander-v2
## CHANGE WITH YOUR REPO ID
repo_id = "subhojitdss/lunarlander-exercise" # Change with your repo id, you can't push with mine 😄

## Define the commit message
commit_message = "Upload PPO LunarLander-v3 trained agent"

# Create the evaluation env and set the render_mode="rgb_array"
eval_env = DummyVecEnv([lambda: gym.make(env_id, render_mode="rgb_array")])

# PLACE the package_to_hub function you've just filled here
package_to_hub(model=model, # Our trained model
               model_name=model_name, # The name of our trained model
               model_architecture=model_architecture, # The model architecture we used: in our case PPO
               env_id=env_id, # Name of the environment
               eval_env=eval_env, # Evaluation Environment
               repo_id=repo_id, # id of the model repository from the Hugging Face Hub (repo_id = {organization}/{repo_name} for instance ThomasSimonini/ppo-LunarLander-v2
               commit_message=commit_message)


[38;5;4mℹ This function will save, evaluate, generate a video of your agent,
create a model card and push everything to the hub. It might take up to 1min.
This is a work in progress: if you encounter a bug, please open an issue.[0m
Saving video to /var/folders/5g/d5t9nrnd47z7b7djysv3l9mm0000gq/T/tmp80889oms/-step-0-to-step-1000.mp4
MoviePy - Building video /var/folders/5g/d5t9nrnd47z7b7djysv3l9mm0000gq/T/tmp80889oms/-step-0-to-step-1000.mp4.
[38;5;1m✘ 'tqdm' object has no attribute 'container'[0m
[38;5;1m✘ We are unable to generate a replay of your agent, the package_to_hub
process continues[0m
[38;5;1m✘ Please open an issue at
https://github.com/huggingface/huggingface_sb3/issues[0m
[38;5;4mℹ Pushing repo subhojitdss/lunarlander-exercise to the Hugging Face
Hub[0m


Upload 3 LFS files:   0%|          | 0/3 [00:00<?, ?it/s]

RuntimeError: Error while uploading 'ppo-LunarLander-v3-subhojit/policy.optimizer.pth' to the Hub.

In [None]:
!pip install 'gymnasium[other]'

In [17]:
import tqdm