In [1]:
from pyvirtualdisplay import Display

virtual_display = Display(visible=0, size=(1400, 900))
virtual_display.start()

<pyvirtualdisplay.display.Display at 0x7ff97015efd0>

In [2]:
from huggingface_sb3 import load_from_hub, package_to_hub, push_to_hub
from huggingface_hub import (
    notebook_login,
)  # To log to our Hugging Face account to be able to upload models to the Hub.

from stable_baselines3 import PPO
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.env_util import make_vec_env

In [3]:
import gym

env = gym.make("LunarLander-v2")

observation = env.reset()

for _ in range(20):
    action = env.action_space.sample()
    print("Action taken:", action)
    observation, reward, done, info = env.step(action)

    # If the game is done (in our case we land, crashed or timeout)
    if done:
        # Reset the environment
        print("Environment is reset")
        observation = env.reset()

Action taken: 2
Action taken: 2
Action taken: 0
Action taken: 0
Action taken: 0
Action taken: 2
Action taken: 3
Action taken: 3
Action taken: 0
Action taken: 1
Action taken: 0
Action taken: 3
Action taken: 0
Action taken: 0
Action taken: 0
Action taken: 1
Action taken: 3
Action taken: 2
Action taken: 0
Action taken: 1


In [4]:
env = gym.make("LunarLander-v2")
env.reset()
print("_____OBSERVATION SPACE_____ \n")
print("Observation Space Shape", env.observation_space.shape)
print("Sample observation", env.observation_space.sample()) 

_____OBSERVATION SPACE_____ 

Observation Space Shape (8,)
Sample observation [ 0.73703396 -1.1346145   1.0606571   0.8613746  -1.0922713   2.131148
  1.0111009  -1.1489828 ]


In [5]:
# Set up actual training environment + SB3 model
env = make_vec_env("LunarLander-v2", n_envs=8)
#env = gym.make("LunarLander-v2")
model = PPO(
    "MlpPolicy", 
    env=env,
    n_steps=1024,
    batch_size=128,
    n_epochs=4,
    gamma=0.999,
    gae_lambda=0.98,
    ent_coef=0.01,
    verbose=0,
)

model.learn(total_timesteps=1000000, progress_bar=True)
model_name = "ppo-LunarLander-v2"
model.save(model_name)


In [6]:
#del model
#model = PPO.load("ppo-LunarLander-v2", env=env)

eval_env = gym.make("LunarLander-v2")
mean_reward, std_reward = evaluate_policy(model, eval_env, n_eval_episodes=10, deterministic=True)
print(f"mean_reward={mean_reward:.2f} +/- {std_reward}")



mean_reward=247.70 +/- 17.669725435586663


In [7]:
import gym

from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.env_util import make_vec_env

from huggingface_sb3 import package_to_hub
import huggingface_hub

env_id = "LunarLander-v2"
model_architecture = "PPO"
repo_id = "saltandpurple/LunarLanderv2-ppo"
commit_message = "lunar lander v2, ppo model v0.1"
eval_env = DummyVecEnv([lambda: gym.make(env_id)])
token = "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" # insert token here

package_to_hub(
    token=token,
    model=model,  
    model_name=model_name,  
    model_architecture=model_architecture,  
    env_id=env_id,  
    eval_env=eval_env,  
    repo_id=repo_id, 
    commit_message=commit_message,
)

[38;5;4mℹ This function will save, evaluate, generate a video of your agent,
create a model card and push everything to the hub. It might take up to 1min.
This is a work in progress: if you encounter a bug, please open an issue.[0m
Saving video to /tmp/tmp32cdiv9o/-step-0-to-step-1000.mp4


ffmpeg version 4.4.2-0ubuntu0.22.04.1 Copyright (c) 2000-2021 the FFmpeg developers
  built with gcc 11 (Ubuntu 11.2.0-19ubuntu1)
  configuration: --prefix=/usr --extra-version=0ubuntu0.22.04.1 --toolchain=hardened --libdir=/usr/lib/x86_64-linux-gnu --incdir=/usr/include/x86_64-linux-gnu --arch=amd64 --enable-gpl --disable-stripping --enable-gnutls --enable-ladspa --enable-libaom --enable-libass --enable-libbluray --enable-libbs2b --enable-libcaca --enable-libcdio --enable-libcodec2 --enable-libdav1d --enable-libflite --enable-libfontconfig --enable-libfreetype --enable-libfribidi --enable-libgme --enable-libgsm --enable-libjack --enable-libmp3lame --enable-libmysofa --enable-libopenjpeg --enable-libopenmpt --enable-libopus --enable-libpulse --enable-librabbitmq --enable-librubberband --enable-libshine --enable-libsnappy --enable-libsoxr --enable-libspeex --enable-libsrt --enable-libssh --enable-libtheora --enable-libtwolame --enable-libvidstab --enable-libvorbis --enable-libvpx --enab

[38;5;4mℹ Pushing repo saltandpurple/LunarLanderv2-ppo to the Hugging Face
Hub[0m


ppo-LunarLander-v2.zip:   0%|          | 0.00/147k [00:00<?, ?B/s]

Upload 3 LFS files:   0%|          | 0/3 [00:00<?, ?it/s]

policy.pth:   0%|          | 0.00/43.4k [00:00<?, ?B/s]

policy.optimizer.pth:   0%|          | 0.00/87.9k [00:00<?, ?B/s]

[38;5;4mℹ Your model is pushed to the Hub. You can view your model here:
https://huggingface.co/saltandpurple/LunarLanderv2-ppo/tree/main/[0m


'https://huggingface.co/saltandpurple/LunarLanderv2-ppo/tree/main/'