In [1]:
import robosuite as suite
from robosuite.wrappers.gym_wrapper import GymWrapper
import gymnasium as gym
from stable_baselines3.common.vec_env import SubprocVecEnv
from stable_baselines3.common.utils import set_random_seed
from stable_baselines3.common.monitor import Monitor
import time
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.results_plotter import load_results, ts2xy
from stable_baselines3.common.callbacks import BaseCallback
import numpy as np




In [2]:
import os
filename = 'tmp/gym/multiinput_256'
cwd = os.getcwd()
new_folder = os.path.join(cwd, filename)
os.makedirs(new_folder, exist_ok=True)

In [3]:
# create environment instance
env = GymWrapper(suite.make(
    env_name="Lift", # try with other tasks like "Stack" and "Door"
    robots="UR5e",  # try with other robots like "Sawyer" and "Jaco"
    has_renderer=True,
    has_offscreen_renderer=True,
    use_object_obs=False,                   # don't provide object observations to agent
    use_camera_obs=True,
    camera_names="robot0_eye_in_hand",      # use "agentview" camera for observations
    camera_heights=84,                      # image height
    camera_widths=84,                       # image width
    reward_shaping=True,                    # use a dense reward signal for learning
    horizon = 500,
    control_freq=20,                        # control should happen fast enough so that simulation looks smooth
))
env = Monitor(env, filename)
# ), ['robot0_eye_in_hand_image'])

In [4]:
class SaveOnBestTrainingRewardCallback(BaseCallback):
    """
    Callback for saving a model (the check is done every ``check_freq`` steps)
    based on the training reward (in practice, we recommend using ``EvalCallback``).

    :param check_freq: (int)
    :param log_dir: (str) Path to the folder where the model will be saved.
      It must contains the file created by the ``Monitor`` wrapper.
    :param verbose: (int)
    """

    def __init__(self, check_freq: int, log_dir: str, verbose=1):
        super().__init__(verbose)
        self.check_freq = check_freq
        self.log_dir = log_dir
        self.save_path = os.path.join(log_dir, "best_model")
        self.best_mean_reward = -np.inf

    def _init_callback(self) -> None:
        # Create folder if needed
        if self.save_path is not None:
            os.makedirs(self.save_path, exist_ok=True)

    def _on_step(self) -> bool:
        if self.n_calls % self.check_freq == 0:

            # Retrieve training reward
            x, y = ts2xy(load_results(self.log_dir), "timesteps")
            if len(x) > 0:
                # Mean training reward over the last 100 episodes
                mean_reward = np.mean(y[-100:])
                if self.verbose > 0:
                    print(f"Num timesteps: {self.num_timesteps}")
                    print(
                        f"Best mean reward: {self.best_mean_reward:.2f} - Last mean reward per episode: {mean_reward:.2f}"
                    )

                # New best model, you could save the agent here
                if mean_reward > self.best_mean_reward:
                    self.best_mean_reward = mean_reward
                    # Example for saving best model
                    if self.verbose > 0:
                        print(f"Saving new best model to {self.save_path}.zip")
                    self.model.save(self.save_path)

        return True

In [5]:
env.keys

  logger.warn(


['robot0_joint_pos_cos',
 'robot0_joint_pos_sin',
 'robot0_joint_vel',
 'robot0_eef_pos',
 'robot0_eef_quat',
 'robot0_gripper_qpos',
 'robot0_gripper_qvel',
 'robot0_eye_in_hand_image',
 'robot0_proprio-state',
 'robot0_eye_in_hand_image',
 'robot0_proprio-state']

In [6]:
from stable_baselines3 import PPO
policy_kwargs = dict(
    net_arch=[256, 256]
)
model = PPO("MultiInputPolicy", env, verbose=1, batch_size=256, policy_kwargs=policy_kwargs)
callback = SaveOnBestTrainingRewardCallback(check_freq=1000, log_dir=filename)
model.learn(total_timesteps=5e5, progress_bar=True, log_interval=10, callback=callback)

Using cuda device
Wrapping the env in a DummyVecEnv.
Wrapping the env in a VecTransposeImage.


Output()

--------------------------------------
| rollout/                |          |
|    ep_len_mean          | 500      |
|    ep_rew_mean          | 4.78     |
| time/                   |          |
|    fps                  | 91       |
|    iterations           | 10       |
|    time_elapsed         | 223      |
|    total_timesteps      | 20480    |
| train/                  |          |
|    approx_kl            | 0.044494 |
|    clip_fraction        | 0.335    |
|    clip_range           | 0.2      |
|    entropy_loss         | -9.86    |
|    explained_variance   | 0.174    |
|    learning_rate        | 0.0003   |
|    loss                 | -0.00169 |
|    n_updates            | 90       |
|    policy_gradient_loss | -0.046   |
|    std                  | 0.989    |
|    value_loss           | 0.213    |
--------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 500         |
|    ep_rew_mean          | 8.4         |
| time/                   |             |
|    fps                  | 91          |
|    iterations           | 20          |
|    time_elapsed         | 447         |
|    total_timesteps      | 40960       |
| train/                  |             |
|    approx_kl            | 0.111041635 |
|    clip_fraction        | 0.477       |
|    clip_range           | 0.2         |
|    entropy_loss         | -9.69       |
|    explained_variance   | 0.345       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0166     |
|    n_updates            | 190         |
|    policy_gradient_loss | -0.0553     |
|    std                  | 0.966       |
|    value_loss           | 0.269       |
-----------------------------------------


----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 500        |
|    ep_rew_mean          | 17.3       |
| time/                   |            |
|    fps                  | 90         |
|    iterations           | 30         |
|    time_elapsed         | 677        |
|    total_timesteps      | 61440      |
| train/                  |            |
|    approx_kl            | 0.13339072 |
|    clip_fraction        | 0.544      |
|    clip_range           | 0.2        |
|    entropy_loss         | -9.61      |
|    explained_variance   | 0.342      |
|    learning_rate        | 0.0003     |
|    loss                 | -0.0624    |
|    n_updates            | 290        |
|    policy_gradient_loss | -0.0648    |
|    std                  | 0.955      |
|    value_loss           | 0.391      |
----------------------------------------


----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 500        |
|    ep_rew_mean          | 29.7       |
| time/                   |            |
|    fps                  | 90         |
|    iterations           | 40         |
|    time_elapsed         | 908        |
|    total_timesteps      | 81920      |
| train/                  |            |
|    approx_kl            | 0.22554597 |
|    clip_fraction        | 0.542      |
|    clip_range           | 0.2        |
|    entropy_loss         | -9.52      |
|    explained_variance   | 0.749      |
|    learning_rate        | 0.0003     |
|    loss                 | -0.0988    |
|    n_updates            | 390        |
|    policy_gradient_loss | -0.0662    |
|    std                  | 0.942      |
|    value_loss           | 0.452      |
----------------------------------------


----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 500        |
|    ep_rew_mean          | 38         |
| time/                   |            |
|    fps                  | 89         |
|    iterations           | 50         |
|    time_elapsed         | 1138       |
|    total_timesteps      | 102400     |
| train/                  |            |
|    approx_kl            | 0.23873079 |
|    clip_fraction        | 0.63       |
|    clip_range           | 0.2        |
|    entropy_loss         | -9.31      |
|    explained_variance   | 0.877      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.0366     |
|    n_updates            | 490        |
|    policy_gradient_loss | -0.0611    |
|    std                  | 0.913      |
|    value_loss           | 0.729      |
----------------------------------------


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 500       |
|    ep_rew_mean          | 37.5      |
| time/                   |           |
|    fps                  | 89        |
|    iterations           | 60        |
|    time_elapsed         | 1369      |
|    total_timesteps      | 122880    |
| train/                  |           |
|    approx_kl            | 0.3654223 |
|    clip_fraction        | 0.645     |
|    clip_range           | 0.2       |
|    entropy_loss         | -9.08     |
|    explained_variance   | 0.907     |
|    learning_rate        | 0.0003    |
|    loss                 | -0.0785   |
|    n_updates            | 590       |
|    policy_gradient_loss | -0.0726   |
|    std                  | 0.883     |
|    value_loss           | 0.119     |
---------------------------------------


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 500       |
|    ep_rew_mean          | 36.1      |
| time/                   |           |
|    fps                  | 89        |
|    iterations           | 70        |
|    time_elapsed         | 1598      |
|    total_timesteps      | 143360    |
| train/                  |           |
|    approx_kl            | 0.5254365 |
|    clip_fraction        | 0.683     |
|    clip_range           | 0.2       |
|    entropy_loss         | -8.87     |
|    explained_variance   | 0.9       |
|    learning_rate        | 0.0003    |
|    loss                 | -0.105    |
|    n_updates            | 690       |
|    policy_gradient_loss | -0.0778   |
|    std                  | 0.858     |
|    value_loss           | 0.184     |
---------------------------------------


----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 500        |
|    ep_rew_mean          | 35.4       |
| time/                   |            |
|    fps                  | 89         |
|    iterations           | 80         |
|    time_elapsed         | 1831       |
|    total_timesteps      | 163840     |
| train/                  |            |
|    approx_kl            | 0.24545756 |
|    clip_fraction        | 0.625      |
|    clip_range           | 0.2        |
|    entropy_loss         | -8.7       |
|    explained_variance   | 0.906      |
|    learning_rate        | 0.0003     |
|    loss                 | -0.0991    |
|    n_updates            | 790        |
|    policy_gradient_loss | -0.0683    |
|    std                  | 0.838      |
|    value_loss           | 0.399      |
----------------------------------------


----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 500        |
|    ep_rew_mean          | 41.5       |
| time/                   |            |
|    fps                  | 89         |
|    iterations           | 90         |
|    time_elapsed         | 2069       |
|    total_timesteps      | 184320     |
| train/                  |            |
|    approx_kl            | 0.37441644 |
|    clip_fraction        | 0.652      |
|    clip_range           | 0.2        |
|    entropy_loss         | -8.43      |
|    explained_variance   | 0.909      |
|    learning_rate        | 0.0003     |
|    loss                 | -0.0965    |
|    n_updates            | 890        |
|    policy_gradient_loss | -0.0788    |
|    std                  | 0.805      |
|    value_loss           | 0.347      |
----------------------------------------


----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 500        |
|    ep_rew_mean          | 45.9       |
| time/                   |            |
|    fps                  | 89         |
|    iterations           | 100        |
|    time_elapsed         | 2299       |
|    total_timesteps      | 204800     |
| train/                  |            |
|    approx_kl            | 0.36002478 |
|    clip_fraction        | 0.638      |
|    clip_range           | 0.2        |
|    entropy_loss         | -8.13      |
|    explained_variance   | 0.945      |
|    learning_rate        | 0.0003     |
|    loss                 | -0.0901    |
|    n_updates            | 990        |
|    policy_gradient_loss | -0.0678    |
|    std                  | 0.773      |
|    value_loss           | 0.44       |
----------------------------------------


----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 500        |
|    ep_rew_mean          | 43         |
| time/                   |            |
|    fps                  | 89         |
|    iterations           | 110        |
|    time_elapsed         | 2525       |
|    total_timesteps      | 225280     |
| train/                  |            |
|    approx_kl            | 0.40526617 |
|    clip_fraction        | 0.677      |
|    clip_range           | 0.2        |
|    entropy_loss         | -7.91      |
|    explained_variance   | 0.874      |
|    learning_rate        | 0.0003     |
|    loss                 | -0.0866    |
|    n_updates            | 1090       |
|    policy_gradient_loss | -0.0722    |
|    std                  | 0.747      |
|    value_loss           | 0.387      |
----------------------------------------


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 500       |
|    ep_rew_mean          | 37.8      |
| time/                   |           |
|    fps                  | 89        |
|    iterations           | 120       |
|    time_elapsed         | 2747      |
|    total_timesteps      | 245760    |
| train/                  |           |
|    approx_kl            | 0.7008157 |
|    clip_fraction        | 0.695     |
|    clip_range           | 0.2       |
|    entropy_loss         | -7.62     |
|    explained_variance   | 0.929     |
|    learning_rate        | 0.0003    |
|    loss                 | -0.0917   |
|    n_updates            | 1190      |
|    policy_gradient_loss | -0.0662   |
|    std                  | 0.719     |
|    value_loss           | 0.148     |
---------------------------------------


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 500       |
|    ep_rew_mean          | 29.5      |
| time/                   |           |
|    fps                  | 89        |
|    iterations           | 130       |
|    time_elapsed         | 2970      |
|    total_timesteps      | 266240    |
| train/                  |           |
|    approx_kl            | 0.9167501 |
|    clip_fraction        | 0.714     |
|    clip_range           | 0.2       |
|    entropy_loss         | -7.47     |
|    explained_variance   | 0.849     |
|    learning_rate        | 0.0003    |
|    loss                 | -0.0798   |
|    n_updates            | 1290      |
|    policy_gradient_loss | -0.0865   |
|    std                  | 0.704     |
|    value_loss           | 0.283     |
---------------------------------------


----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 500        |
|    ep_rew_mean          | 35.6       |
| time/                   |            |
|    fps                  | 89         |
|    iterations           | 140        |
|    time_elapsed         | 3208       |
|    total_timesteps      | 286720     |
| train/                  |            |
|    approx_kl            | 0.38028213 |
|    clip_fraction        | 0.691      |
|    clip_range           | 0.2        |
|    entropy_loss         | -7.15      |
|    explained_variance   | 0.905      |
|    learning_rate        | 0.0003     |
|    loss                 | -0.0692    |
|    n_updates            | 1390       |
|    policy_gradient_loss | -0.0588    |
|    std                  | 0.673      |
|    value_loss           | 0.387      |
----------------------------------------


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 500       |
|    ep_rew_mean          | 54.3      |
| time/                   |           |
|    fps                  | 89        |
|    iterations           | 150       |
|    time_elapsed         | 3447      |
|    total_timesteps      | 307200    |
| train/                  |           |
|    approx_kl            | 0.5715065 |
|    clip_fraction        | 0.735     |
|    clip_range           | 0.2       |
|    entropy_loss         | -6.92     |
|    explained_variance   | 0.942     |
|    learning_rate        | 0.0003    |
|    loss                 | -0.0824   |
|    n_updates            | 1490      |
|    policy_gradient_loss | -0.0574   |
|    std                  | 0.649     |
|    value_loss           | 0.423     |
---------------------------------------


----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 500        |
|    ep_rew_mean          | 71.2       |
| time/                   |            |
|    fps                  | 88         |
|    iterations           | 160        |
|    time_elapsed         | 3688       |
|    total_timesteps      | 327680     |
| train/                  |            |
|    approx_kl            | 0.50131917 |
|    clip_fraction        | 0.7        |
|    clip_range           | 0.2        |
|    entropy_loss         | -6.72      |
|    explained_variance   | 0.971      |
|    learning_rate        | 0.0003     |
|    loss                 | -0.0728    |
|    n_updates            | 1590       |
|    policy_gradient_loss | -0.0651    |
|    std                  | 0.631      |
|    value_loss           | 0.256      |
----------------------------------------


----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 500        |
|    ep_rew_mean          | 79         |
| time/                   |            |
|    fps                  | 88         |
|    iterations           | 170        |
|    time_elapsed         | 3917       |
|    total_timesteps      | 348160     |
| train/                  |            |
|    approx_kl            | 0.49974716 |
|    clip_fraction        | 0.661      |
|    clip_range           | 0.2        |
|    entropy_loss         | -6.52      |
|    explained_variance   | 0.927      |
|    learning_rate        | 0.0003     |
|    loss                 | -0.0785    |
|    n_updates            | 1690       |
|    policy_gradient_loss | -0.0657    |
|    std                  | 0.614      |
|    value_loss           | 0.262      |
----------------------------------------


----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 500        |
|    ep_rew_mean          | 74.3       |
| time/                   |            |
|    fps                  | 89         |
|    iterations           | 180        |
|    time_elapsed         | 4137       |
|    total_timesteps      | 368640     |
| train/                  |            |
|    approx_kl            | 0.36858672 |
|    clip_fraction        | 0.678      |
|    clip_range           | 0.2        |
|    entropy_loss         | -6.25      |
|    explained_variance   | 0.912      |
|    learning_rate        | 0.0003     |
|    loss                 | -0.0725    |
|    n_updates            | 1790       |
|    policy_gradient_loss | -0.0589    |
|    std                  | 0.592      |
|    value_loss           | 0.556      |
----------------------------------------


----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 500        |
|    ep_rew_mean          | 68.4       |
| time/                   |            |
|    fps                  | 89         |
|    iterations           | 190        |
|    time_elapsed         | 4358       |
|    total_timesteps      | 389120     |
| train/                  |            |
|    approx_kl            | 0.36234456 |
|    clip_fraction        | 0.671      |
|    clip_range           | 0.2        |
|    entropy_loss         | -6.05      |
|    explained_variance   | 0.966      |
|    learning_rate        | 0.0003     |
|    loss                 | -0.0463    |
|    n_updates            | 1890       |
|    policy_gradient_loss | -0.0508    |
|    std                  | 0.574      |
|    value_loss           | 0.305      |
----------------------------------------


----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 500        |
|    ep_rew_mean          | 86.8       |
| time/                   |            |
|    fps                  | 89         |
|    iterations           | 200        |
|    time_elapsed         | 4574       |
|    total_timesteps      | 409600     |
| train/                  |            |
|    approx_kl            | 0.36100337 |
|    clip_fraction        | 0.675      |
|    clip_range           | 0.2        |
|    entropy_loss         | -5.82      |
|    explained_variance   | 0.955      |
|    learning_rate        | 0.0003     |
|    loss                 | -0.0736    |
|    n_updates            | 1990       |
|    policy_gradient_loss | -0.0473    |
|    std                  | 0.556      |
|    value_loss           | 0.694      |
----------------------------------------


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 500       |
|    ep_rew_mean          | 97.7      |
| time/                   |           |
|    fps                  | 89        |
|    iterations           | 210       |
|    time_elapsed         | 4795      |
|    total_timesteps      | 430080    |
| train/                  |           |
|    approx_kl            | 0.5653002 |
|    clip_fraction        | 0.748     |
|    clip_range           | 0.2       |
|    entropy_loss         | -5.59     |
|    explained_variance   | 0.968     |
|    learning_rate        | 0.0003    |
|    loss                 | -0.0703   |
|    n_updates            | 2090      |
|    policy_gradient_loss | -0.0543   |
|    std                  | 0.538     |
|    value_loss           | 0.439     |
---------------------------------------


----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 500        |
|    ep_rew_mean          | 107        |
| time/                   |            |
|    fps                  | 89         |
|    iterations           | 220        |
|    time_elapsed         | 5017       |
|    total_timesteps      | 450560     |
| train/                  |            |
|    approx_kl            | 0.50470227 |
|    clip_fraction        | 0.709      |
|    clip_range           | 0.2        |
|    entropy_loss         | -5.38      |
|    explained_variance   | 0.922      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.132      |
|    n_updates            | 2190       |
|    policy_gradient_loss | -0.0283    |
|    std                  | 0.523      |
|    value_loss           | 1.46       |
----------------------------------------


----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 500        |
|    ep_rew_mean          | 92.1       |
| time/                   |            |
|    fps                  | 89         |
|    iterations           | 230        |
|    time_elapsed         | 5248       |
|    total_timesteps      | 471040     |
| train/                  |            |
|    approx_kl            | 0.62536776 |
|    clip_fraction        | 0.691      |
|    clip_range           | 0.2        |
|    entropy_loss         | -5.18      |
|    explained_variance   | 0.951      |
|    learning_rate        | 0.0003     |
|    loss                 | -0.0135    |
|    n_updates            | 2290       |
|    policy_gradient_loss | -0.0446    |
|    std                  | 0.508      |
|    value_loss           | 0.44       |
----------------------------------------


----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 500        |
|    ep_rew_mean          | 76.7       |
| time/                   |            |
|    fps                  | 89         |
|    iterations           | 240        |
|    time_elapsed         | 5477       |
|    total_timesteps      | 491520     |
| train/                  |            |
|    approx_kl            | 0.34418285 |
|    clip_fraction        | 0.656      |
|    clip_range           | 0.2        |
|    entropy_loss         | -4.99      |
|    explained_variance   | 0.973      |
|    learning_rate        | 0.0003     |
|    loss                 | -0.019     |
|    n_updates            | 2390       |
|    policy_gradient_loss | -0.0242    |
|    std                  | 0.496      |
|    value_loss           | 0.39       |
----------------------------------------


<stable_baselines3.ppo.ppo.PPO at 0x7fd51cf94150>

In [None]:
model.save(filename)

In [8]:
obs = env.reset()[0]
action_list = []
for i in range(500):
    action, state = model.predict(obs)
    action_list.append(action)
    obs, reward, done,done, info = env.step(action)
    env.render()
    # time.sleep(0.2)
    if done:
        break
env.close()

QObject::moveToThread: Current thread (0xe9abfb0) is not the object's thread (0xcd03960).
Cannot move to target thread (0xe9abfb0)

QObject::moveToThread: Current thread (0xe9abfb0) is not the object's thread (0xcd03960).
Cannot move to target thread (0xe9abfb0)

QObject::moveToThread: Current thread (0xe9abfb0) is not the object's thread (0xcd03960).
Cannot move to target thread (0xe9abfb0)

QObject::moveToThread: Current thread (0xe9abfb0) is not the object's thread (0xcd03960).
Cannot move to target thread (0xe9abfb0)

QObject::moveToThread: Current thread (0xe9abfb0) is not the object's thread (0xcd03960).
Cannot move to target thread (0xe9abfb0)

QObject::moveToThread: Current thread (0xe9abfb0) is not the object's thread (0xcd03960).
Cannot move to target thread (0xe9abfb0)

QObject::moveToThread: Current thread (0xe9abfb0) is not the object's thread (0xcd03960).
Cannot move to target thread (0xe9abfb0)

QObject::moveToThread: Current thread (0xe9abfb0) is not the object's thread

ValueError: I/O operation on closed file.

In [None]:
from stable_baselines3 import PPO
model = PPO.load('trained_model', env=env)
model.learn(total_timesteps=100000, progress_bar=True, log_interval=10)
model.save('trained_model')


In [None]:
import torch
torch.cuda.is_available()

In [None]:
import robosuite.macros as macros
macros.IMAGE_CONVENTION = "opencv"
env = GymWrapper(suite.make(
    env_name="Lift", # try with other tasks like "Stack" and "Door"
    robots="UR5e",  # try with other robots like "Sawyer" and "Jaco"
    has_renderer=False,
    ignore_done = True,
    has_offscreen_renderer=True,
    use_object_obs=True,                   # don't provide object observations to agent
    use_camera_obs=True,
    camera_names="agentview",      # use "agentview" camera for observations
    camera_heights=512,                      # image height
    camera_widths=512,                       # image width
    reward_shaping=True,                    # use a dense reward signal for learning
    horizon = 500,
    control_freq=20,                        # control should happen fast enough so that simulation looks smooth
))

In [None]:
import imageio

writer = imageio.get_writer(f'{filename}/video.mp4', fps=20)
obs = env.reset()[0]
for i,v in enumerate(action_list):
    obs, reward, done,done, info = env.step(v)
    frame = obs["agentview_image"]
    writer.append_data(frame)
    print("Saving frame #{}".format(i))
    if done:
        break
writer.close()
env.close()