In [1]:
import os

import gymnasium as gym
import panda_gym


## Create the environment

In [2]:
env_id = "PandaReachDense-v3"

# Create the env
env = gym.make(env_id)

# Get the state space and action space
s_size = env.observation_space.shape
a_size = env.action_space


pybullet build time: Nov 28 2023 23:45:17


argv[0]=--background_color_red=0.8745098114013672
argv[1]=--background_color_green=0.21176470816135406
argv[2]=--background_color_blue=0.1764705926179886


In [3]:
print("_____OBSERVATION SPACE_____ \n")
print("The State Space is: ", s_size)
print("Sample observation", env.observation_space.sample()) # Get a random observation


_____OBSERVATION SPACE_____ 

The State Space is:  None
Sample observation OrderedDict([('achieved_goal', array([ 6.3085732, -6.711124 , -9.177889 ], dtype=float32)), ('desired_goal', array([-3.107425 ,  7.0993648, -9.723987 ], dtype=float32)), ('observation', array([ 4.1328325 ,  2.8865662 , -8.111492  ,  9.377049  , -0.45503455,
       -2.5548368 ], dtype=float32))])


In [4]:
print("\n _____ACTION SPACE_____ \n")
print("The Action Space is: ", a_size)
print("Action Space Sample", env.action_space.sample()) # Take a random action



 _____ACTION SPACE_____ 

The Action Space is:  Box(-1.0, 1.0, (3,), float32)
Action Space Sample [-0.7594141   0.47557107  0.6061477 ]


## Normalize observation and  rewards

In [5]:
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.vec_env import VecNormalize

env = make_vec_env(env_id, n_envs=4)

env = VecNormalize(env, norm_obs=True, norm_reward=True, clip_obs=10.)


argv[0]=--background_color_red=0.8745098114013672
argv[1]=--background_color_green=0.21176470816135406
argv[2]=--background_color_blue=0.1764705926179886
argv[0]=--background_color_red=0.8745098114013672
argv[1]=--background_color_green=0.21176470816135406
argv[2]=--background_color_blue=0.1764705926179886
argv[0]=--background_color_red=0.8745098114013672
argv[1]=--background_color_green=0.21176470816135406
argv[2]=--background_color_blue=0.1764705926179886
argv[0]=--background_color_red=0.8745098114013672
argv[1]=--background_color_green=0.21176470816135406
argv[2]=--background_color_blue=0.1764705926179886


## Create and train the Model

In [6]:
from stable_baselines3 import A2C

model = A2C(policy = "MultiInputPolicy",
            env = env,
            verbose=1)


Using cuda device


In [7]:
model.learn(1_000_000)


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 45.2     |
|    ep_rew_mean        | -12.6    |
|    success_rate       | 0.122    |
| time/                 |          |
|    fps                | 574      |
|    iterations         | 100      |
|    time_elapsed       | 3        |
|    total_timesteps    | 2000     |
| train/                |          |
|    entropy_loss       | -4.31    |
|    explained_variance | 0.838    |
|    learning_rate      | 0.0007   |
|    n_updates          | 99       |
|    policy_loss        | -0.873   |
|    std                | 1.02     |
|    value_loss         | 0.18     |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 46.1     |
|    ep_rew_mean        | -15.2    |
|    success_rate       | 0.107    |
| time/                 |          |
|    fps                | 670      |
|    iterations         | 200      |
|

<stable_baselines3.a2c.a2c.A2C at 0x7e09b471c490>

In [8]:
# Save the model and  VecNormalize statistics when saving the agent
model.save("a2c-PandaReachDense-v3")
env.save("vec_normalize.pkl")


## Evaluate the model

In [9]:
from stable_baselines3.common.vec_env import DummyVecEnv

from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize

# Load the saved statistics
eval_env = DummyVecEnv([lambda: gym.make("PandaReachDense-v3")])
eval_env = VecNormalize.load("vec_normalize.pkl", eval_env)

# We need to override the render_mode
eval_env.render_mode = "rgb_array"

#  do not update them at test time
eval_env.training = False
# reward normalization is not needed at test time
eval_env.norm_reward = False

# Load the agent
model = A2C.load("a2c-PandaReachDense-v3")



argv[0]=--background_color_red=0.8745098114013672
argv[1]=--background_color_green=0.21176470816135406
argv[2]=--background_color_blue=0.1764705926179886


In [10]:
from stable_baselines3.common.evaluation import evaluate_policy

mean_reward, std_reward = evaluate_policy(model, eval_env)

print(f"Mean reward = {mean_reward:.2f} +/- {std_reward:.2f}")


Mean reward = -0.28 +/- 0.11




## Publish trained model on  the Hub

In [11]:
from huggingface_hub import notebook_login
from huggingface_sb3 import load_from_hub, package_to_hub


In [12]:
notebook_login()
!git config --global credential.helper store


VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [13]:
from huggingface_sb3 import package_to_hub

package_to_hub(
    model=model,
    model_name=f"a2c-{env_id}",
    model_architecture="A2C",
    env_id=env_id,
    eval_env=eval_env,
    repo_id=f"ra9hu/a2c-{env_id}", # Change the username
    commit_message="Initial commit",
)


[38;5;4mℹ This function will save, evaluate, generate a video of your agent,
create a model card and push everything to the hub. It might take up to 1min.
This is a work in progress: if you encounter a bug, please open an issue.[0m




Saving video to /tmp/tmp1f_o4zel/-step-0-to-step-1000.mp4
Moviepy - Building video /tmp/tmp1f_o4zel/-step-0-to-step-1000.mp4.
Moviepy - Writing video /tmp/tmp1f_o4zel/-step-0-to-step-1000.mp4



ffmpeg version 9c33b2f Copyright (c) 2000-2021 the FFmpeg developers
  built with gcc 9.3.0 (crosstool-NG 1.24.0.133_b0863d8_dirty)
  configuration: --prefix=/home/conda/feedstock_root/build_artifacts/ffmpeg_1627813612080/_h_env_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_plac --cc=/home/conda/feedstock_root/build_artifacts/ffmpeg_1627813612080/_build_env/bin/x86_64-conda-linux-gnu-cc --disable-doc --disable-openssl --enable-avresample --enable-gnutls --enable-gpl --enable-hardcoded-tables --enable-libfreetype --enable-libopenh264 --enable-libx264 --enable-pic --enable-pthreads --enable-shared --enable-static --enable-version3 --enable-zlib --enable-libmp3lame --pkg-config=/home/conda/feedstock_root/build_artifacts/ffmpeg_1627813612080/_build_env/bin/pkg-config
  libavutil      56. 51.100 / 56. 51.100
  libavcodec     58. 91.100 / 58. 91.100
  libavfor

Moviepy - Done !
Moviepy - video ready /tmp/tmp1f_o4zel/-step-0-to-step-1000.mp4


frame= 1001 fps=0.0 q=-1.0 Lsize=     771kB time=00:00:39.92 bitrate= 158.3kbits/s speed=52.1x    
video:759kB audio:0kB subtitle:0kB other streams:0kB global headers:0kB muxing overhead: 1.571664%
[libx264 @ 0x6552055086c0] frame I:5     Avg QP:17.14  size:  6929
[libx264 @ 0x6552055086c0] frame P:307   Avg QP:24.04  size:   842
[libx264 @ 0x6552055086c0] frame B:689   Avg QP:29.47  size:   702
[libx264 @ 0x6552055086c0] consecutive B-frames:  2.1% 10.6% 23.4% 63.9%
[libx264 @ 0x6552055086c0] mb I  I16..4: 19.5% 64.1% 16.4%
[libx264 @ 0x6552055086c0] mb P  I16..4:  0.2%  0.5%  0.5%  P16..4:  2.1%  1.2%  0.7%  0.0%  0.0%    skip:94.8%
[libx264 @ 0x6552055086c0] mb B  I16..4:  0.1%  0.1%  0.2%  B16..8:  3.5%  1.9%  0.7%  direct: 0.2%  skip:93.3%  L0:54.0% L1:43.7% BI: 2.3%
[libx264 @ 0x6552055086c0] 8x8 transform intra:51.3% inter:8.6%
[libx264 @ 0x6552055086c0] coded y,uvDC,uvAC intra: 21.4% 4.6% 2.8% inter: 1.0% 0.0% 0.0%
[libx264 @ 0x6552055086c0] i16 v,h,dc,p: 66% 12% 21%  1%
[libx2

[38;5;4mℹ Pushing repo ra9hu/a2c-PandaReachDense-v3 to the Hugging Face Hub[0m


a2c-PandaReachDense-v3.zip:   0%|          | 0.00/111k [00:00<?, ?B/s]

pytorch_variables.pth:   0%|          | 0.00/864 [00:00<?, ?B/s]

policy.pth:   0%|          | 0.00/46.4k [00:00<?, ?B/s]

policy.optimizer.pth:   0%|          | 0.00/45.2k [00:00<?, ?B/s]

Upload 5 LFS files:   0%|          | 0/5 [00:00<?, ?it/s]

vec_normalize.pkl:   0%|          | 0.00/2.65k [00:00<?, ?B/s]

[38;5;4mℹ Your model is pushed to the Hub. You can view your model here:
https://huggingface.co/ra9hu/a2c-PandaReachDense-v3/tree/main/[0m


CommitInfo(commit_url='https://huggingface.co/ra9hu/a2c-PandaReachDense-v3/commit/cb62f4e2cfcae0650666280ea1002bdf46cdac65', commit_message='Initial commit', commit_description='', oid='cb62f4e2cfcae0650666280ea1002bdf46cdac65', pr_url=None, pr_revision=None, pr_num=None)