In [1]:
import gymnasium as gym

from huggingface_sb3 import load_from_hub, package_to_hub
from huggingface_hub import (
    notebook_login,
)  # To log to our Hugging Face account to be able to upload models to the Hub.

from stable_baselines3 import PPO
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.monitor import Monitor

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
import gymnasium as gym 

# Creating the environment
env = gym.make("LunarLander-v3", render_mode="human")

# Resetting the enironment
observation, info = env.reset()

for _ in range(200):
    # Take a random action 
    action = env.action_space.sample()
    print("Action taken: ", action)

    # Apply this action in the env
    observation, reward, terminated, truncated, info = env.step(action)
    env.render()

    if terminated or truncated:
        print("Environment is reset")
        observation, info = env.reset()

env.close()

  from pkg_resources import resource_stream, resource_exists


Action taken:  3
Action taken:  0
Action taken:  2
Action taken:  1
Action taken:  3
Action taken:  1
Action taken:  1
Action taken:  2
Action taken:  1
Action taken:  0
Action taken:  3
Action taken:  3
Action taken:  2
Action taken:  0
Action taken:  1
Action taken:  0
Action taken:  2
Action taken:  2
Action taken:  0
Action taken:  0
Action taken:  1
Action taken:  0
Action taken:  3
Action taken:  0
Action taken:  1
Action taken:  2
Action taken:  3
Action taken:  1
Action taken:  3
Action taken:  1
Action taken:  3
Action taken:  3
Action taken:  1
Action taken:  2
Action taken:  1
Action taken:  0
Action taken:  3
Action taken:  0
Action taken:  2
Action taken:  2
Action taken:  0
Action taken:  2
Action taken:  2
Action taken:  3
Action taken:  2
Action taken:  2
Action taken:  0
Action taken:  1
Action taken:  2
Action taken:  0
Action taken:  3
Action taken:  0
Action taken:  1
Action taken:  0
Action taken:  3
Action taken:  3
Action taken:  3
Action taken:  2
Action taken: 

In [5]:
env = gym.make("LunarLander-v3")
env.reset()

print("___OBS SPACE___\n")
print("Observation space shape: ", env.observation_space.shape)
print("Sample Observation space: ", env.observation_space.sample())

___OBS SPACE___

Observation space shape:  (8,)
Sample Observation space:  [-2.2554545  -1.4047152  -0.60978454 -5.0573773  -3.5330036   5.566765
  0.6627417   0.24428055]


In [6]:
print("\n _____ACTION SPACE_____ \n")
print("Action Space Shape", env.action_space.n)
print("Action Space Sample", env.action_space.sample())  # Take a random action


 _____ACTION SPACE_____ 

Action Space Shape 4
Action Space Sample 3


In [7]:
env = make_vec_env("LunarLander-v3", n_envs=16)

# Setting up StableBaseline 4 Training

In [None]:
import tensorboard
env = make_vec_env("LunarLander-v3", n_envs=16)

# Instantiate the agent
model = PPO('MlpPolicy', 
            env=env, 
            tensorboard_log="./LunarLander-v3_tensorboard/",
            n_steps=1024, 
            batch_size=64, 
            n_epochs=4, 
            gamma=0.999, 
            gae_lambda=0.98, 
            ent_coef=0.01,
            verbose=1)

# Train the agent
model.learn(total_timesteps=int(1e6), progress_bar=True)

# Save Model to file
model.save("my_LunarLander-v3_model")

# Evaluation

In [2]:
eval_env = gym.make("LunarLander-v3")
eval_env = Monitor(eval_env)

trained_model = PPO.load("my_LunarLander-v3_model", env=eval_env)

mean_reward, std_reward = evaluate_policy(trained_model, trained_model.get_env(), n_eval_episodes=10, deterministic=True)
print(f"mean_reward={mean_reward:.2f} +/- {std_reward}")

  from pkg_resources import resource_stream, resource_exists


Wrapping the env in a DummyVecEnv.
mean_reward=246.93 +/- 22.105188547978273


# Graphical Visualisation

In [None]:
# Creating the environment
env = gym.make("LunarLander-v3", render_mode="human")
trained_model = PPO.load("my_LunarLander-v3_model", env=eval_env)

# Resetting the enironment
observation, info = env.reset()

for _ in range(500):
    # Take a random action 
    action, state = trained_model.predict(observation, )
    
    # Apply this action in the env
    observation, reward, terminated, truncated, info = env.step(action)
    env.render()

    if terminated or truncated:
        print("Episode is finished")
        break

Wrapping the env in a DummyVecEnv.
Episode is finished


In [16]:
import gymnasium as gym
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.env_util import make_vec_env

from huggingface_sb3 import package_to_hub

repo_id = "Pucciland95/ppo-LunarLander-v3"
env_id = "LunarLander-v3"

eval_env = DummyVecEnv([lambda: gym.make(env_id, render_mode="rgb_array")])

model_architecture = "PPO"
commit_message = "Uploaded PPO LunarLander-v3 trained agent"

model = PPO.load("my_LunarLander-v3_model", env=eval_env)

package_to_hub(model=model, 
               model_name="ChopChopMotherFucker",
               model_architecture=model_architecture,
               env_id=env_id,
               eval_env=eval_env,
               repo_id=repo_id,
               commit_message=commit_message)

[38;5;4mℹ This function will save, evaluate, generate a video of your agent,
create a model card and push everything to the hub. It might take up to 1min.
This is a work in progress: if you encounter a bug, please open an issue.[0m




Saving video to /tmp/tmpu93ohzk_/-step-0-to-step-1000.mp4
MoviePy - Building video /tmp/tmpu93ohzk_/-step-0-to-step-1000.mp4.
MoviePy - Writing video /tmp/tmpu93ohzk_/-step-0-to-step-1000.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready /tmp/tmpu93ohzk_/-step-0-to-step-1000.mp4
[38;5;1m✘ 'DummyVecEnv' object has no attribute 'video_recorder'[0m
[38;5;1m✘ We are unable to generate a replay of your agent, the package_to_hub
process continues[0m
[38;5;1m✘ Please open an issue at
https://github.com/huggingface/huggingface_sb3/issues[0m
[38;5;4mℹ Pushing repo Pucciland95/ppo-LunarLander-v3 to the Hugging Face
Hub[0m


Processing Files (4 / 4): 100%|██████████|  282kB /  282kB,  149kB/s  
New Data Upload: 100%|██████████|  109kB /  109kB,  109kB/s  


[38;5;4mℹ Your model is pushed to the Hub. You can view your model here:
https://huggingface.co/Pucciland95/ppo-LunarLander-v3/tree/main/[0m


CommitInfo(commit_url='https://huggingface.co/Pucciland95/ppo-LunarLander-v3/commit/c7c678b9c912771347195671c06d64ccfbed56aa', commit_message='Uploaded PPO LunarLander-v3 trained agent', commit_description='', oid='c7c678b9c912771347195671c06d64ccfbed56aa', pr_url=None, repo_url=RepoUrl('https://huggingface.co/Pucciland95/ppo-LunarLander-v3', endpoint='https://huggingface.co', repo_type='model', repo_id='Pucciland95/ppo-LunarLander-v3'), pr_revision=None, pr_num=None)

# Cloning and Evaluating the Model you just pushed

In [25]:
from huggingface_sb3 import load_from_hub

repo_id = "Pucciland95/ppo-LunarLander-v3"
filename = "ChopChopMotherFucker.zip"

custom_objects = {
            "learning_rate": 0.0,
            "lr_schedule": lambda _: 0.0,
            "clip_range": lambda _: 0.0,
}

checkpoint = load_from_hub(repo_id, filename)
model = PPO.load(checkpoint, custom_objects=custom_objects, print_system_info=True)

eval_env = Monitor(gym.make("LunarLander-v3"))
mean_reward, std_reward = evaluate_policy(model, eval_env, n_eval_episodes=10, deterministic=True)
print(f"mean_reward={mean_reward:.2f} +/- {std_reward}")

== CURRENT SYSTEM INFO ==
- OS: Linux-6.6.87.2-microsoft-standard-WSL2-x86_64-with-glibc2.39 # 1 SMP PREEMPT_DYNAMIC Thu Jun  5 18:30:46 UTC 2025
- Python: 3.12.3
- Stable-Baselines3: 2.7.0
- PyTorch: 2.9.0+cu128
- GPU Enabled: False
- Numpy: 2.2.6
- Cloudpickle: 3.1.1
- Gymnasium: 1.2.1

== SAVED MODEL SYSTEM INFO ==
- OS: Linux-6.6.87.2-microsoft-standard-WSL2-x86_64-with-glibc2.39 # 1 SMP PREEMPT_DYNAMIC Thu Jun  5 18:30:46 UTC 2025
- Python: 3.12.3
- Stable-Baselines3: 2.7.0
- PyTorch: 2.9.0+cu128
- GPU Enabled: False
- Numpy: 2.2.6
- Cloudpickle: 3.1.1
- Gymnasium: 1.2.1

mean_reward=268.40 +/- 17.698211256933895
