In [1]:
import robosuite as suite
from robosuite.wrappers.gym_wrapper import GymWrapper
import gymnasium as gym
from stable_baselines3.common.vec_env import SubprocVecEnv
from stable_baselines3.common.utils import set_random_seed
from stable_baselines3.common.monitor import Monitor
import time




In [2]:
import os

cwd = os.getcwd()
new_folder = os.path.join(cwd, "tmp/gym")
os.makedirs(new_folder, exist_ok=True)

In [3]:
# create environment instance
env = GymWrapper(suite.make(
    env_name="Lift", # try with other tasks like "Stack" and "Door"
    robots="UR5e",  # try with other robots like "Sawyer" and "Jaco"
    has_renderer=True,
    has_offscreen_renderer=True,
    use_object_obs=True,                   # don't provide object observations to agent
    use_camera_obs=True,
    camera_names="robot0_eye_in_hand",      # use "agentview" camera for observations
    camera_heights=84,                      # image height
    camera_widths=84,                       # image width
    reward_shaping=True,                    # use a dense reward signal for learning
    horizon = 500,
    control_freq=20,                        # control should happen fast enough so that simulation looks smooth
), ['robot0_eye_in_hand_image'])
env = Monitor(env, "tmp/gym" )
# ), ['robot0_eye_in_hand_image'])

In [4]:
env.observation_space

Dict('robot0_eye_in_hand_image': Box(0, 255, (84, 84, 3), uint8))

In [5]:
from stable_baselines3 import PPO
model = PPO("MultiInputPolicy", env, verbose=1)
model.learn(total_timesteps=2048, progress_bar=True, log_interval=5)

Using cpu device
Wrapping the env in a DummyVecEnv.
Wrapping the env in a VecTransposeImage.


ValueError: You must use `MultiInputPolicy` when working with dict observation space, not CnnPolicy

In [5]:
obs = env.reset()[0]
for i in range(500):
    action, state = model.predict(obs)
    obs, reward, done,done, info = env.step(action)
    env.render()
    time.sleep(0.2)
    if done:
        break
env.close()

KeyboardInterrupt: 

In [3]:
from stable_baselines3 import PPO
model = PPO.load('trained_model', env=env)
model.learn(total_timesteps=100000, progress_bar=True, log_interval=10)
model.save('trained_model')


Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Wrapping the env in a VecTransposeImage.


Output()

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 500        |
|    ep_rew_mean          | 74.8       |
| time/                   |            |
|    fps                  | 89         |
|    iterations           | 10         |
|    time_elapsed         | 227        |
|    total_timesteps      | 20480      |
| train/                  |            |
|    approx_kl            | 0.49315557 |
|    clip_fraction        | 0.723      |
|    clip_range           | 0.2        |
|    entropy_loss         | -3.56      |
|    explained_variance   | 0.946      |
|    learning_rate        | 0.0003     |
|    loss                 | -0.0686    |
|    n_updates            | 1560       |
|    policy_gradient_loss | -0.0664    |
|    std                  | 0.401      |
|    value_loss           | 0.785      |
----------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean  

In [6]:
import torch
torch.cuda.is_available()

True