In [1]:
import robosuite as suite
from robosuite.wrappers.gym_wrapper import GymWrapper
import gymnasium as gym
from stable_baselines3.common.vec_env import SubprocVecEnv
from stable_baselines3.common.utils import set_random_seed
from stable_baselines3.common.monitor import Monitor
import time
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.results_plotter import load_results, ts2xy
from stable_baselines3.common.callbacks import BaseCallback
import numpy as np



In [2]:
import os
filename = 'tmp/gym/twoarm_ppo'
cwd = os.getcwd()
new_folder = os.path.join(cwd, filename)
os.makedirs(new_folder, exist_ok=True)

In [3]:
# create environment instance
env = GymWrapper(suite.make(
    env_name="TwoArmPegInHole", # try with other tasks like "Stack" and "Door"
    robots=["UR5e","UR5e"],  # try with other robots like "Sawyer" and "Jaco"
    has_renderer=True,
    has_offscreen_renderer=True,
    use_object_obs=False,                   # don't provide object observations to agent
    use_camera_obs=True,
    camera_names="robot0_eye_in_hand",      # use "agentview" camera for observations
    camera_heights=84,                      # image height
    camera_widths=84,                       # image width
    reward_shaping=True,                    # use a dense reward signal for learning
    horizon = 500,
    control_freq=20,                        # control should happen fast enough so that simulation looks smooth
))
env = Monitor(env, filename)
# ), ['robot0_eye_in_hand_image'])

In [4]:
env.observation_space

Dict('robot0_eef_pos': Box(-1.0, 1.0, (3,), float32), 'robot0_eef_quat': Box(-1.0, 1.0, (4,), float32), 'robot0_eye_in_hand_image': Box(0, 255, (84, 84, 3), uint8), 'robot0_joint_pos_cos': Box(-1.0, 1.0, (6,), float32), 'robot0_joint_pos_sin': Box(-1.0, 1.0, (6,), float32), 'robot0_joint_vel': Box(-1.0, 1.0, (6,), float32), 'robot0_proprio-state': Box(-1.0, 1.0, (25,), float32), 'robot1_eef_pos': Box(-1.0, 1.0, (3,), float32), 'robot1_eef_quat': Box(-1.0, 1.0, (4,), float32), 'robot1_joint_pos_cos': Box(-1.0, 1.0, (6,), float32), 'robot1_joint_pos_sin': Box(-1.0, 1.0, (6,), float32), 'robot1_joint_vel': Box(-1.0, 1.0, (6,), float32), 'robot1_proprio-state': Box(-1.0, 1.0, (25,), float32))

In [5]:
class SaveOnBestTrainingRewardCallback(BaseCallback):
    """
    Callback for saving a model (the check is done every ``check_freq`` steps)
    based on the training reward (in practice, we recommend using ``EvalCallback``).

    :param check_freq: (int)
    :param log_dir: (str) Path to the folder where the model will be saved.
      It must contains the file created by the ``Monitor`` wrapper.
    :param verbose: (int)
    """

    def __init__(self, check_freq: int, log_dir: str, verbose=1):
        super().__init__(verbose)
        self.check_freq = check_freq
        self.log_dir = log_dir
        self.save_path = os.path.join(log_dir, "best_model")
        self.best_mean_reward = -np.inf

    def _init_callback(self) -> None:
        # Create folder if needed
        if self.save_path is not None:
            os.makedirs(self.save_path, exist_ok=True)

    def _on_step(self) -> bool:
        if self.n_calls % self.check_freq == 0:

            # Retrieve training reward
            x, y = ts2xy(load_results(self.log_dir), "timesteps")
            if len(x) > 0:
                # Mean training reward over the last 100 episodes
                mean_reward = np.mean(y[-100:])
                if self.verbose > 0:
                    print(f"Num timesteps: {self.num_timesteps}")
                    print(
                        f"Best mean reward: {self.best_mean_reward:.2f} - Last mean reward per episode: {mean_reward:.2f}"
                    )

                # New best model, you could save the agent here
                if mean_reward > self.best_mean_reward:
                    self.best_mean_reward = mean_reward
                    # Example for saving best model
                    if self.verbose > 0:
                        print(f"Saving new best model to {self.save_path}.zip")
                    self.model.save(self.save_path)

        return True

In [6]:
from stable_baselines3 import PPO
policy_kwargs = dict(
    net_arch=[256, 256]
)
model = PPO("MultiInputPolicy", env, verbose=1, batch_size=256, policy_kwargs=policy_kwargs)
callback = SaveOnBestTrainingRewardCallback(check_freq=1000, log_dir=filename)
model.learn(total_timesteps=5e5, progress_bar=True, log_interval=10, callback=callback)

Using cuda device
Wrapping the env in a DummyVecEnv.
Wrapping the env in a VecTransposeImage.


Output()

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 500         |
|    ep_rew_mean          | 222         |
| time/                   |             |
|    fps                  | 78          |
|    iterations           | 10          |
|    time_elapsed         | 261         |
|    total_timesteps      | 20480       |
| train/                  |             |
|    approx_kl            | 0.011395536 |
|    clip_fraction        | 0.159       |
|    clip_range           | 0.2         |
|    entropy_loss         | -17         |
|    explained_variance   | 0.0136      |
|    learning_rate        | 0.0003      |
|    loss                 | 1.38        |
|    n_updates            | 90          |
|    policy_gradient_loss | -0.00952    |
|    std                  | 1           |
|    value_loss           | 7.74        |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 500         |
|    ep_rew_mean          | 227         |
| time/                   |             |
|    fps                  | 78          |
|    iterations           | 20          |
|    time_elapsed         | 520         |
|    total_timesteps      | 40960       |
| train/                  |             |
|    approx_kl            | 0.008254632 |
|    clip_fraction        | 0.0898      |
|    clip_range           | 0.2         |
|    entropy_loss         | -17         |
|    explained_variance   | 0.302       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.668       |
|    n_updates            | 190         |
|    policy_gradient_loss | -0.0101     |
|    std                  | 1           |
|    value_loss           | 6.77        |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 500         |
|    ep_rew_mean          | 226         |
| time/                   |             |
|    fps                  | 79          |
|    iterations           | 30          |
|    time_elapsed         | 777         |
|    total_timesteps      | 61440       |
| train/                  |             |
|    approx_kl            | 0.012809235 |
|    clip_fraction        | 0.126       |
|    clip_range           | 0.2         |
|    entropy_loss         | -17         |
|    explained_variance   | 0.658       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.369       |
|    n_updates            | 290         |
|    policy_gradient_loss | -0.0112     |
|    std                  | 1           |
|    value_loss           | 6.47        |
-----------------------------------------


----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 500        |
|    ep_rew_mean          | 231        |
| time/                   |            |
|    fps                  | 78         |
|    iterations           | 40         |
|    time_elapsed         | 1047       |
|    total_timesteps      | 81920      |
| train/                  |            |
|    approx_kl            | 0.02132714 |
|    clip_fraction        | 0.235      |
|    clip_range           | 0.2        |
|    entropy_loss         | -17        |
|    explained_variance   | 0.736      |
|    learning_rate        | 0.0003     |
|    loss                 | 1.02       |
|    n_updates            | 390        |
|    policy_gradient_loss | -0.0179    |
|    std                  | 0.997      |
|    value_loss           | 8.67       |
----------------------------------------


----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 500        |
|    ep_rew_mean          | 241        |
| time/                   |            |
|    fps                  | 77         |
|    iterations           | 50         |
|    time_elapsed         | 1325       |
|    total_timesteps      | 102400     |
| train/                  |            |
|    approx_kl            | 0.02624504 |
|    clip_fraction        | 0.284      |
|    clip_range           | 0.2        |
|    entropy_loss         | -17        |
|    explained_variance   | 0.738      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.366      |
|    n_updates            | 490        |
|    policy_gradient_loss | -0.0174    |
|    std                  | 0.998      |
|    value_loss           | 5.26       |
----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 500         |
|    ep_rew_mean          | 249         |
| time/                   |             |
|    fps                  | 76          |
|    iterations           | 60          |
|    time_elapsed         | 1603        |
|    total_timesteps      | 122880      |
| train/                  |             |
|    approx_kl            | 0.032504156 |
|    clip_fraction        | 0.402       |
|    clip_range           | 0.2         |
|    entropy_loss         | -17         |
|    explained_variance   | 0.796       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.827       |
|    n_updates            | 590         |
|    policy_gradient_loss | -0.00782    |
|    std                  | 0.998       |
|    value_loss           | 4.8         |
-----------------------------------------


----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 500        |
|    ep_rew_mean          | 250        |
| time/                   |            |
|    fps                  | 76         |
|    iterations           | 70         |
|    time_elapsed         | 1880       |
|    total_timesteps      | 143360     |
| train/                  |            |
|    approx_kl            | 0.03863416 |
|    clip_fraction        | 0.361      |
|    clip_range           | 0.2        |
|    entropy_loss         | -17.1      |
|    explained_variance   | 0.936      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.233      |
|    n_updates            | 690        |
|    policy_gradient_loss | -0.0168    |
|    std                  | 1.01       |
|    value_loss           | 2.34       |
----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 500         |
|    ep_rew_mean          | 259         |
| time/                   |             |
|    fps                  | 76          |
|    iterations           | 80          |
|    time_elapsed         | 2148        |
|    total_timesteps      | 163840      |
| train/                  |             |
|    approx_kl            | 0.052617386 |
|    clip_fraction        | 0.448       |
|    clip_range           | 0.2         |
|    entropy_loss         | -17.1       |
|    explained_variance   | 0.729       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.255       |
|    n_updates            | 790         |
|    policy_gradient_loss | -0.00826    |
|    std                  | 1           |
|    value_loss           | 9.03        |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 500         |
|    ep_rew_mean          | 270         |
| time/                   |             |
|    fps                  | 76          |
|    iterations           | 90          |
|    time_elapsed         | 2423        |
|    total_timesteps      | 184320      |
| train/                  |             |
|    approx_kl            | 0.043158695 |
|    clip_fraction        | 0.406       |
|    clip_range           | 0.2         |
|    entropy_loss         | -17         |
|    explained_variance   | 0.879       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.101       |
|    n_updates            | 890         |
|    policy_gradient_loss | -0.0151     |
|    std                  | 0.996       |
|    value_loss           | 3.11        |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 500         |
|    ep_rew_mean          | 270         |
| time/                   |             |
|    fps                  | 75          |
|    iterations           | 100         |
|    time_elapsed         | 2696        |
|    total_timesteps      | 204800      |
| train/                  |             |
|    approx_kl            | 0.039029747 |
|    clip_fraction        | 0.408       |
|    clip_range           | 0.2         |
|    entropy_loss         | -17         |
|    explained_variance   | 0.726       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.452       |
|    n_updates            | 990         |
|    policy_gradient_loss | -0.00632    |
|    std                  | 1           |
|    value_loss           | 4.98        |
-----------------------------------------


----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 500        |
|    ep_rew_mean          | 272        |
| time/                   |            |
|    fps                  | 75         |
|    iterations           | 110        |
|    time_elapsed         | 2970       |
|    total_timesteps      | 225280     |
| train/                  |            |
|    approx_kl            | 0.04693403 |
|    clip_fraction        | 0.467      |
|    clip_range           | 0.2        |
|    entropy_loss         | -17        |
|    explained_variance   | 0.956      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.0706     |
|    n_updates            | 1090       |
|    policy_gradient_loss | -0.0118    |
|    std                  | 1          |
|    value_loss           | 2.65       |
----------------------------------------


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 500       |
|    ep_rew_mean          | 293       |
| time/                   |           |
|    fps                  | 75        |
|    iterations           | 120       |
|    time_elapsed         | 3259      |
|    total_timesteps      | 245760    |
| train/                  |           |
|    approx_kl            | 0.0711153 |
|    clip_fraction        | 0.48      |
|    clip_range           | 0.2       |
|    entropy_loss         | -17       |
|    explained_variance   | 0.773     |
|    learning_rate        | 0.0003    |
|    loss                 | 0.145     |
|    n_updates            | 1190      |
|    policy_gradient_loss | -0.00281  |
|    std                  | 1         |
|    value_loss           | 5.38      |
---------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 500         |
|    ep_rew_mean          | 300         |
| time/                   |             |
|    fps                  | 74          |
|    iterations           | 130         |
|    time_elapsed         | 3576        |
|    total_timesteps      | 266240      |
| train/                  |             |
|    approx_kl            | 0.054752585 |
|    clip_fraction        | 0.397       |
|    clip_range           | 0.2         |
|    entropy_loss         | -17         |
|    explained_variance   | 0.441       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.313       |
|    n_updates            | 1290        |
|    policy_gradient_loss | -0.00997    |
|    std                  | 0.998       |
|    value_loss           | 13.1        |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 500         |
|    ep_rew_mean          | 307         |
| time/                   |             |
|    fps                  | 73          |
|    iterations           | 140         |
|    time_elapsed         | 3893        |
|    total_timesteps      | 286720      |
| train/                  |             |
|    approx_kl            | 0.056276172 |
|    clip_fraction        | 0.477       |
|    clip_range           | 0.2         |
|    entropy_loss         | -17         |
|    explained_variance   | 0.277       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.342       |
|    n_updates            | 1390        |
|    policy_gradient_loss | 0.00683     |
|    std                  | 0.996       |
|    value_loss           | 10.7        |
-----------------------------------------


----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 500        |
|    ep_rew_mean          | 302        |
| time/                   |            |
|    fps                  | 73         |
|    iterations           | 150        |
|    time_elapsed         | 4203       |
|    total_timesteps      | 307200     |
| train/                  |            |
|    approx_kl            | 0.03956879 |
|    clip_fraction        | 0.404      |
|    clip_range           | 0.2        |
|    entropy_loss         | -17        |
|    explained_variance   | 0.898      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.469      |
|    n_updates            | 1490       |
|    policy_gradient_loss | -0.000723  |
|    std                  | 0.996      |
|    value_loss           | 4.03       |
----------------------------------------


----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 500        |
|    ep_rew_mean          | 300        |
| time/                   |            |
|    fps                  | 72         |
|    iterations           | 160        |
|    time_elapsed         | 4515       |
|    total_timesteps      | 327680     |
| train/                  |            |
|    approx_kl            | 0.06621088 |
|    clip_fraction        | 0.43       |
|    clip_range           | 0.2        |
|    entropy_loss         | -17        |
|    explained_variance   | 0.872      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.116      |
|    n_updates            | 1590       |
|    policy_gradient_loss | -0.0183    |
|    std                  | 1          |
|    value_loss           | 3.09       |
----------------------------------------


----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 500        |
|    ep_rew_mean          | 296        |
| time/                   |            |
|    fps                  | 72         |
|    iterations           | 170        |
|    time_elapsed         | 4817       |
|    total_timesteps      | 348160     |
| train/                  |            |
|    approx_kl            | 0.03860174 |
|    clip_fraction        | 0.326      |
|    clip_range           | 0.2        |
|    entropy_loss         | -17.1      |
|    explained_variance   | 0.922      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.373      |
|    n_updates            | 1690       |
|    policy_gradient_loss | -0.0164    |
|    std                  | 1.01       |
|    value_loss           | 4          |
----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 500         |
|    ep_rew_mean          | 298         |
| time/                   |             |
|    fps                  | 72          |
|    iterations           | 180         |
|    time_elapsed         | 5115        |
|    total_timesteps      | 368640      |
| train/                  |             |
|    approx_kl            | 0.034937404 |
|    clip_fraction        | 0.363       |
|    clip_range           | 0.2         |
|    entropy_loss         | -17.1       |
|    explained_variance   | 0.779       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.441       |
|    n_updates            | 1790        |
|    policy_gradient_loss | -0.00524    |
|    std                  | 1           |
|    value_loss           | 9.24        |
-----------------------------------------


----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 500        |
|    ep_rew_mean          | 301        |
| time/                   |            |
|    fps                  | 71         |
|    iterations           | 190        |
|    time_elapsed         | 5412       |
|    total_timesteps      | 389120     |
| train/                  |            |
|    approx_kl            | 0.06125351 |
|    clip_fraction        | 0.421      |
|    clip_range           | 0.2        |
|    entropy_loss         | -17.1      |
|    explained_variance   | 0.915      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.235      |
|    n_updates            | 1890       |
|    policy_gradient_loss | -0.0125    |
|    std                  | 1.01       |
|    value_loss           | 3.69       |
----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 500         |
|    ep_rew_mean          | 303         |
| time/                   |             |
|    fps                  | 71          |
|    iterations           | 200         |
|    time_elapsed         | 5713        |
|    total_timesteps      | 409600      |
| train/                  |             |
|    approx_kl            | 0.049124226 |
|    clip_fraction        | 0.454       |
|    clip_range           | 0.2         |
|    entropy_loss         | -17.1       |
|    explained_variance   | 0.649       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.211       |
|    n_updates            | 1990        |
|    policy_gradient_loss | -0.00323    |
|    std                  | 1.01        |
|    value_loss           | 7.36        |
-----------------------------------------


----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 500        |
|    ep_rew_mean          | 306        |
| time/                   |            |
|    fps                  | 71         |
|    iterations           | 210        |
|    time_elapsed         | 6017       |
|    total_timesteps      | 430080     |
| train/                  |            |
|    approx_kl            | 0.08807609 |
|    clip_fraction        | 0.5        |
|    clip_range           | 0.2        |
|    entropy_loss         | -17.1      |
|    explained_variance   | 0.24       |
|    learning_rate        | 0.0003     |
|    loss                 | 0.763      |
|    n_updates            | 2090       |
|    policy_gradient_loss | -0.00437   |
|    std                  | 1.01       |
|    value_loss           | 21.9       |
----------------------------------------


----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 500        |
|    ep_rew_mean          | 309        |
| time/                   |            |
|    fps                  | 71         |
|    iterations           | 220        |
|    time_elapsed         | 6314       |
|    total_timesteps      | 450560     |
| train/                  |            |
|    approx_kl            | 0.06684698 |
|    clip_fraction        | 0.44       |
|    clip_range           | 0.2        |
|    entropy_loss         | -17.1      |
|    explained_variance   | 0.774      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.408      |
|    n_updates            | 2190       |
|    policy_gradient_loss | -0.00204   |
|    std                  | 1.01       |
|    value_loss           | 11.8       |
----------------------------------------


----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 500        |
|    ep_rew_mean          | 304        |
| time/                   |            |
|    fps                  | 71         |
|    iterations           | 230        |
|    time_elapsed         | 6608       |
|    total_timesteps      | 471040     |
| train/                  |            |
|    approx_kl            | 0.08941426 |
|    clip_fraction        | 0.496      |
|    clip_range           | 0.2        |
|    entropy_loss         | -17.1      |
|    explained_variance   | 0.604      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.208      |
|    n_updates            | 2290       |
|    policy_gradient_loss | -0.000954  |
|    std                  | 1          |
|    value_loss           | 7.12       |
----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 500         |
|    ep_rew_mean          | 290         |
| time/                   |             |
|    fps                  | 71          |
|    iterations           | 240         |
|    time_elapsed         | 6893        |
|    total_timesteps      | 491520      |
| train/                  |             |
|    approx_kl            | 0.054978274 |
|    clip_fraction        | 0.445       |
|    clip_range           | 0.2         |
|    entropy_loss         | -17.1       |
|    explained_variance   | 0.959       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.154       |
|    n_updates            | 2390        |
|    policy_gradient_loss | -0.00932    |
|    std                  | 1.01        |
|    value_loss           | 3.01        |
-----------------------------------------


<stable_baselines3.ppo.ppo.PPO at 0x7fc52141f550>

In [7]:
obs = env.reset()[0]
action_list = []
for i in range(500):
    action, state = model.predict(obs)
    action_list.append(action)
    obs, reward, done,done, info = env.step(action)
    env.render()
    # time.sleep(0.2)
    if done:
        break
env.close()

QObject::moveToThread: Current thread (0x149c2140) is not the object's thread (0x17b0c690).
Cannot move to target thread (0x149c2140)

QObject::moveToThread: Current thread (0x149c2140) is not the object's thread (0x17b0c690).
Cannot move to target thread (0x149c2140)

QObject::moveToThread: Current thread (0x149c2140) is not the object's thread (0x17b0c690).
Cannot move to target thread (0x149c2140)

QObject::moveToThread: Current thread (0x149c2140) is not the object's thread (0x17b0c690).
Cannot move to target thread (0x149c2140)

QObject::moveToThread: Current thread (0x149c2140) is not the object's thread (0x17b0c690).
Cannot move to target thread (0x149c2140)

QObject::moveToThread: Current thread (0x149c2140) is not the object's thread (0x17b0c690).
Cannot move to target thread (0x149c2140)

QObject::moveToThread: Current thread (0x149c2140) is not the object's thread (0x17b0c690).
Cannot move to target thread (0x149c2140)

QObject::moveToThread: Current thread (0x149c2140) is n

In [12]:
import robosuite.macros as macros
macros.IMAGE_CONVENTION = "opencv"
env = GymWrapper(suite.make(
    env_name="TwoArmPegInHole", # try with other tasks like "Stack" and "Door"
    robots=["UR5e", "UR5e"],  # try with other robots like "Sawyer" and "Jaco"
    has_renderer=False,
    ignore_done = True,
    has_offscreen_renderer=True,
    use_object_obs=True,                   # don't provide object observations to agent
    use_camera_obs=True,
    camera_names="frontview",      # use "agentview" camera for observations
    camera_heights=512,                      # image height
    camera_widths=512,                       # image width
    reward_shaping=True,                    # use a dense reward signal for learning
    horizon = 500,
    control_freq=20,                        # control should happen fast enough so that simulation looks smooth
))

In [13]:
import imageio

writer = imageio.get_writer(f'{filename}/video.mp4', fps=20)
obs = env.reset()[0]
for i,v in enumerate(action_list):
    obs, reward, done,done, info = env.step(v)
    frame = obs["frontview_image"]
    writer.append_data(frame)
    print("Saving frame #{}".format(i))
    if done:
        break
writer.close()
env.close()

Saving frame #0
Saving frame #1
Saving frame #2
Saving frame #3
Saving frame #4
Saving frame #5
Saving frame #6
Saving frame #7
Saving frame #8
Saving frame #9
Saving frame #10
Saving frame #11
Saving frame #12
Saving frame #13
Saving frame #14
Saving frame #15
Saving frame #16
Saving frame #17
Saving frame #18
Saving frame #19
Saving frame #20
Saving frame #21
Saving frame #22
Saving frame #23
Saving frame #24
Saving frame #25
Saving frame #26
Saving frame #27
Saving frame #28
Saving frame #29
Saving frame #30
Saving frame #31
Saving frame #32
Saving frame #33
Saving frame #34
Saving frame #35
Saving frame #36
Saving frame #37
Saving frame #38
Saving frame #39
Saving frame #40
Saving frame #41
Saving frame #42
Saving frame #43
Saving frame #44
Saving frame #45
Saving frame #46
Saving frame #47
Saving frame #48
Saving frame #49
Saving frame #50
Saving frame #51
Saving frame #52
Saving frame #53
Saving frame #54
Saving frame #55
Saving frame #56
Saving frame #57
Saving frame #58
Saving 

Saving frame #478
Saving frame #479
Saving frame #480
Saving frame #481
Saving frame #482
Saving frame #483
Saving frame #484
Saving frame #485
Saving frame #486
Saving frame #487
Saving frame #488
Saving frame #489
Saving frame #490
Saving frame #491
Saving frame #492
Saving frame #493
Saving frame #494
Saving frame #495
Saving frame #496
Saving frame #497
Saving frame #498
Saving frame #499
