In [1]:
import argparse
import copy 
import multiprocessing as mp
import pathlib
import pickle
import time
from typing import Sequence, Tuple, cast
import os 

import gym.wrappers
import numpy as np
import omegaconf
import skvideo.io
import torch

import mbrl.planning;
import mbrl.util
from mbrl.util.env import EnvHandler
from mbrl.util.pybullet import FreezePybullet, PybulletEnvHandler

import tactile_gym.rl_envs
from tactile_gym.sb3_helpers.params import import_parameters

# produce a display to render image
from pyvirtualdisplay import Display
_display = Display(visible=False, size=(1400, 900))
_ = _display.start()

pybullet build time: Mar  8 2021 17:26:24


In [2]:
algo_name = 'ppo'
env_name = 'object_push-v0'
rl_params, algo_params, augmentations = import_parameters(env_name, algo_name)
rl_params["env_modes"][ 'observation_mode'] = 'tactile_pose_goal_excluded_data'
rl_params["env_modes"][ 'control_mode'] = 'TCP_position_control'
rl_params["env_modes"][ 'terminate_early']  = True
rl_params["env_modes"][ 'use_contact'] = True
rl_params["env_modes"][ 'traj_type'] = 'point'
rl_params["env_modes"][ 'task'] = "goal_pos"
rl_params["env_modes"]['planar_states'] = True

env_kwargs={
    'show_gui':False,
    'show_tactile':False,
    'max_steps':rl_params["max_ep_len"],
    'image_size':rl_params["image_size"],
    'env_modes':rl_params["env_modes"],
}

In [3]:
env = gym.make(env_name, **env_kwargs)
env_dict = {"env": env}

argv[0]=
Loaded EGL 1.5 after reload.
GL_VENDOR=NVIDIA Corporation
GL_RENDERER=NVIDIA GeForce RTX 3090/PCIe/SSE2
GL_VERSION=4.6.0 NVIDIA 495.29.05
GL_SHADING_LANGUAGE_VERSION=4.60 NVIDIA
Version = 4.6.0 NVIDIA 495.29.05
Vendor = NVIDIA Corporation
Renderer = NVIDIA GeForce RTX 3090/PCIe/SSE2
ven = NVIDIA Corporation
ven = NVIDIA Corporation


In [4]:
env.reset()
for i in range(10):
    env.step(env.action_space.sample())

# save state
state = PybulletEnvHandler.get_current_state(env_dict)

print("counter at saved", env._env_step_counter)
print("obs saved", env.get_tactile_pose_obs_goal_exluded())
print(env_dict["env"]._env_step_counter)
print(env_dict["env"].get_tactile_pose_obs_goal_exluded())
# print(casted_env._env_step_counter)
# print(casted_env.get_tactile_pose_obs_goal_exluded())

for i in range(200):
    env.step(env.action_space.sample())
print("counter after action", env._env_step_counter)
print("obs after action applied", env.get_tactile_pose_obs_goal_exluded())

# restore state
PybulletEnvHandler.set_env_state(state, env_dict)
env = env_dict["env"]
print("counter after restore", env._env_step_counter)
print("obs after restore", env.get_tactile_pose_obs_goal_exluded())

counter at saved 10
obs saved [0.005668 -0.001877 0.002373 0.999997 0.003428 -0.001897 0.015030 0.999887]
10
[0.005668 -0.001877 0.002373 0.999997 0.003428 -0.001897 0.015030 0.999887]
counter after action 210
obs after action applied [0.102529 0.000676 -0.003912 0.999992 0.098892 0.002402 0.027696 0.999616]
counter after restore 10
obs after restore [0.005691 -0.001871 0.002544 0.999997 0.003428 -0.001897 0.015030 0.999887]


In [5]:
env = gym.make(env_name, **env_kwargs)
env_dict = {"env": env}

argv[0]=
Loaded EGL 1.5 after reload.
GL_VENDOR=NVIDIA Corporation
GL_RENDERER=NVIDIA GeForce RTX 3090/PCIe/SSE2
GL_VERSION=4.6.0 NVIDIA 495.29.05
GL_SHADING_LANGUAGE_VERSION=4.60 NVIDIA
Version = 4.6.0 NVIDIA 495.29.05
Vendor = NVIDIA Corporation
Renderer = NVIDIA GeForce RTX 3090/PCIe/SSE2
ven = NVIDIA Corporation
ven = NVIDIA Corporation
Destroy EGL OpenGL window.


In [9]:
env_dict["env"].reset()
for i in range(10):
        env.step(env.action_space.sample())
print("state", env.get_tactile_pose_obs_goal_exluded())
print("state", env_dict["env"].get_tactile_pose_obs_goal_exluded())
action = env.action_space.sample()
with FreezePybullet(env_dict):

    # apply a action
    obs, *_ = env.step(action)
    for i in range(200):
        env.step(env.action_space.sample())
    print("after applied action", env.get_tactile_pose_obs_goal_exluded())
        
print(env.get_tactile_pose_obs_goal_exluded())
print(env_dict["env"].get_tactile_pose_obs_goal_exluded())

# apply action after freezing
obs_expected, *_ = env.step(action)
print(obs)
print(obs_expected)

state [0.005937 0.001620 0.008806 0.999961 0.003142 0.001637 -0.008242 0.999966]
state [0.005937 0.001620 0.008806 0.999961 0.003142 0.001637 -0.008242 0.999966]
after applied action [0.104763 0.003833 -0.015411 0.999881 0.100738 0.004019 -0.024155 0.999708]
[0.006001 0.001692 0.008923 0.999960 0.003142 0.001637 -0.008242 0.999966]
[0.006001 0.001692 0.008923 0.999960 0.003142 0.001637 -0.008242 0.999966]
[0.006538 0.001242 0.010913 0.999940 0.003684 0.001258 -0.006633 0.999978]
[0.006557 0.001295 0.010943 0.999940 0.003698 0.001311 -0.006802 0.999977]


In [9]:
env__: gym.Env
handler__: EnvHandler


def init(env_name: str, seed: int):
    global env__
    global handler__
    handler__ = mbrl.util.create_handler_from_str(env_name)
    env__ = handler__.make_env_from_str(env_name)
    env__.seed(seed)
    env__.reset()


def evaluate_all_action_sequences(
    action_sequences: Sequence[Sequence[np.ndarray]],
    pool: mp.Pool,  # type: ignore
    current_state: Tuple,
) -> torch.Tensor:

    res_objs = [
        pool.apply_async(evaluate_sequence_fn, (sequence, current_state))  # type: ignore
        for sequence in action_sequences
    ]
    res = [res_obj.get() for res_obj in res_objs]
    return torch.tensor(res, dtype=torch.float32)


def evaluate_sequence_fn(action_sequence: np.ndarray, current_state: Tuple) -> float:
    global env__
    global handler__
    # obs0__ is not used (only here for compatibility with rollout_env)
    obs0 = env__.observation_space.sample()
    env = cast(gym.wrappers.TimeLimit, env__)
    handler__.set_env_state(current_state, env)
    _, rewards_, _ = handler__.rollout_env(
        env, obs0, -1, agent=None, plan=action_sequence
    )
    return rewards_.sum().item()


def get_random_trajectory(horizon):
    global env__
    return [env__.action_space.sample() for _ in range(horizon)]

In [10]:
mp.set_start_method("spawn")
handler_env_name = "pybulletgym___" + env_name
handler = mbrl.util.create_handler_from_str(handler_env_name)
eval_env = handler.make_env_from_str(handler_env_name, **env_kwargs)
seed = 0
eval_env.seed(seed)
torch.random.manual_seed(seed)
np.random.seed(seed)
current_obs = eval_env.reset()

argv[0]=
Loaded EGL 1.5 after reload.
GL_VENDOR=NVIDIA Corporation
GL_RENDERER=NVIDIA GeForce RTX 3090/PCIe/SSE2
GL_VERSION=4.6.0 NVIDIA 495.29.05
GL_SHADING_LANGUAGE_VERSION=4.60 NVIDIA
Version = 4.6.0 NVIDIA 495.29.05
Vendor = NVIDIA Corporation
Renderer = NVIDIA GeForce RTX 3090/PCIe/SSE2
ven = NVIDIA Corporation
ven = NVIDIA Corporation


In [13]:
global env__
print(env__)

NameError: name 'env__' is not defined

In [20]:
action_sequences = torch.from_numpy(np.array([eval_env.action_space.sample() for _ in range(15)])).float()
action_sequences = torch.tile(action_sequences, (5,1,1))
print(action_sequences.shape)

torch.Size([5, 15, 2])


In [14]:
num_processes = 1
with mp.Pool(
    processes=num_processes, initializer=init, initargs=[handler_env_name, seed]
) as pool__:
    eval_env_dict = {"env": eval_env}
    current_state = handler.get_current_state(
                eval_env_dict
            )

    action_sequences = torch.from_numpy(np.array([env.action_space.sample() for _ in range(15)])).float()
    
    obs0 = env.observation_space.sample()
    env = {"env": env}
    handler.set_env_state(current_state, env)
    
    _, rewards_, _ = handler.rollout_env(
        env, obs0, -1, agent=None, plan=action_sequences
    )
    # print(evaluate_all_action_sequences(
    #                 action_sequences,
    #                 pool__,
    #                 current_state__,
    #             ))
    

argv[0]=
Loaded EGL 1.5 after reload.
GL_VENDOR=NVIDIA Corporation
GL_RENDERER=NVIDIA GeForce RTX 3090/PCIe/SSE2
GL_VERSION=4.6.0 NVIDIA 495.29.05
GL_SHADING_LANGUAGE_VERSION=4.60 NVIDIA
Version = 4.6.0 NVIDIA 495.29.05
Vendor = NVIDIA Corporation
Renderer = NVIDIA GeForce RTX 3090/PCIe/SSE2


Traceback (most recent call last):
  File "<string>", line 1, in <module>
  File "/home/qt21590/anaconda3/envs/tactile_gym_mbrl/lib/python3.9/multiprocessing/spawn.py", line 116, in spawn_main
    exitcode = _main(fd, parent_sentinel)
  File "/home/qt21590/anaconda3/envs/tactile_gym_mbrl/lib/python3.9/multiprocessing/spawn.py", line 126, in _main
    self = reduction.pickle.load(from_parent)
AttributeError: Can't get attribute 'init' on <module '__main__' (built-in)>


ven = NVIDIA Corporation
ven = NVIDIA Corporation


Traceback (most recent call last):
  File "<string>", line 1, in <module>
  File "/home/qt21590/anaconda3/envs/tactile_gym_mbrl/lib/python3.9/multiprocessing/spawn.py", line 116, in spawn_main
    exitcode = _main(fd, parent_sentinel)
  File "/home/qt21590/anaconda3/envs/tactile_gym_mbrl/lib/python3.9/multiprocessing/spawn.py", line 126, in _main
    self = reduction.pickle.load(from_parent)
AttributeError: Can't get attribute 'init' on <module '__main__' (built-in)>


Destroy EGL OpenGL window.
argv[0]=


Traceback (most recent call last):
  File "<string>", line 1, in <module>
  File "/home/qt21590/anaconda3/envs/tactile_gym_mbrl/lib/python3.9/multiprocessing/spawn.py", line 116, in spawn_main
    exitcode = _main(fd, parent_sentinel)
  File "/home/qt21590/anaconda3/envs/tactile_gym_mbrl/lib/python3.9/multiprocessing/spawn.py", line 126, in _main
    self = reduction.pickle.load(from_parent)
AttributeError: Can't get attribute 'init' on <module '__main__' (built-in)>


Loaded EGL 1.5 after reload.
GL_VENDOR=NVIDIA Corporation
GL_RENDERER=NVIDIA GeForce RTX 3090/PCIe/SSE2
GL_VERSION=4.6.0 NVIDIA 495.29.05
GL_SHADING_LANGUAGE_VERSION=4.60 NVIDIA
Version = 4.6.0 NVIDIA 495.29.05
Vendor = NVIDIA Corporation
Renderer = NVIDIA GeForce RTX 3090/PCIe/SSE2
ven = NVIDIA Corporation


Traceback (most recent call last):
  File "<string>", line 1, in <module>
  File "/home/qt21590/anaconda3/envs/tactile_gym_mbrl/lib/python3.9/multiprocessing/spawn.py", line 116, in spawn_main
    exitcode = _main(fd, parent_sentinel)
  File "/home/qt21590/anaconda3/envs/tactile_gym_mbrl/lib/python3.9/multiprocessing/spawn.py", line 126, in _main
    self = reduction.pickle.load(from_parent)
AttributeError: Can't get attribute 'init' on <module '__main__' (built-in)>


ven = NVIDIA Corporation
Destroy EGL OpenGL window.


In [15]:
print(rewards_)

[-0.224126 -0.223911 -0.223646 -0.223238 -0.223014 -0.222526 -0.221922
 -0.221897 -0.221668 -0.221450 -0.221396 -0.221321 -0.220934 -0.220382
 -0.220181]


In [None]:
num_processes = 1
samples_per_process = 512
optimizer_type = "cem"

if optimizer_type == "cem":
    optimizer_cfg = omegaconf.OmegaConf.create(
        {
            "_target_": "mbrl.planning.CEMOptimizer",
            "device": "cpu",
            "num_iterations": 5,
            "elite_ratio": 0.1,
            "population_size": num_processes * samples_per_process,
            "alpha": 0.1,
            "lower_bound": "???",
            "upper_bound": "???",
        }
    )
elif optimizer_type == "mppi":
    optimizer_cfg = omegaconf.OmegaConf.create(
        {
            "_target_": "mbrl.planning.MPPIOptimizer",
            "num_iterations": 5,
            "gamma": 1.0,
            "population_size": num_processes * samples_per_process,
            "sigma": 0.95,
            "beta": 0.1,
            "lower_bound": "???",
            "upper_bound": "???",
            "device": "cpu",
        }
    )
elif optimizer_type == "icem":
    optimizer_cfg = omegaconf.OmegaConf.create(
        {
            "_target_": "mbrl.planning.ICEMOptimizer",
            "num_iterations": 2,
            "elite_ratio": 0.1,
            "population_size": num_processes * samples_per_process,
            "population_decay_factor": 1.25,
            "colored_noise_exponent": 2.0,
            "keep_elite_frac": 0.1,
            "alpha": 0.1,
            "lower_bound": "???",
            "upper_bound": "???",
            "return_mean_elites": "true",
            "device": "cpu",
        }
    )
else:
    raise ValueError

controller = mbrl.planning.TrajectoryOptimizer(
        optimizer_cfg,
        eval_env.action_space.low,
        eval_env.action_space.high,
        10,
    )

  if OmegaConf.is_none(config):


In [None]:
work_dir = os.path.join(os.getcwd(), 'saved_control')
if not os.path.exists(work_dir):
    os.mkdir(work_dir)

render = True
num_steps = 1000

In [None]:
with mp.Pool(
    processes=num_processes, initializer=init, initargs=[handler_env_name, seed]
) as pool__:

    total_reward__ = 0
    frames = []
    max_population_size = optimizer_cfg.population_size
    if isinstance(controller.optimizer, mbrl.planning.ICEMOptimizer):
        max_population_size += controller.optimizer.keep_elite_size
    value_history = np.zeros(
        (num_steps, max_population_size, optimizer_cfg.num_iterations)
    )
    values_sizes = []  # for icem
    for t in range(num_steps):
        if render:
            frames.append(eval_env.render(mode="rgb_array"))
        start = time.time()

        current_state__ = handler.get_current_state(
            cast(gym.wrappers.TimeLimit, eval_env)
        )

        def trajectory_eval_fn(action_sequences):
            return evaluate_all_action_sequences(
                action_sequences,
                pool__,
                current_state__,
            )

        best_value = [0]  # this is hacky, sorry

        def compute_population_stats(_population, values, opt_step):
            value_history[t, : len(values), opt_step] = values.numpy()
            values_sizes.append(len(values))
            best_value[0] = max(best_value[0], values.max().item())

        plan = controller.optimize(
            trajectory_eval_fn, callback=compute_population_stats
        )
        action__ = plan[0]
        next_obs__, reward__, done__, _ = eval_env.step(action__)

        total_reward__ += reward__

        print(
            f"step: {t}, time: {time.time() - start: .3f}, "
            f"reward: {reward__: .3f}, pred_value: {best_value[0]: .3f}, "
            f"total_reward: {total_reward__: .3f}"
        )

    output_dir = pathlib.Path(work_dir)
    output_dir = output_dir / handler_env_name / optimizer_type
    pathlib.Path.mkdir(output_dir, exist_ok=True, parents=True)

    if render:
        frames_np = np.stack(frames)
        writer = skvideo.io.FFmpegWriter(
            output_dir / f"control_{handler_env_name}_video.mp4", verbosity=1
        )
        for i in range(len(frames_np)):
            writer.writeFrame(frames_np[i, :, :, :])
        writer.close()

    print("total_reward: ", total_reward__)
    np.save(output_dir / "value_history.npy", value_history)


Traceback (most recent call last):
  File "<string>", line 1, in <module>
  File "/home/qt21590/anaconda3/envs/tactile_gym_mbrl/lib/python3.9/multiprocessing/spawn.py", line 116, in spawn_main
    exitcode = _main(fd, parent_sentinel)
  File "/home/qt21590/anaconda3/envs/tactile_gym_mbrl/lib/python3.9/multiprocessing/spawn.py", line 126, in _main
    self = reduction.pickle.load(from_parent)
AttributeError: Can't get attribute 'init' on <module '__main__' (built-in)>
Traceback (most recent call last):
  File "<string>", line 1, in <module>
  File "/home/qt21590/anaconda3/envs/tactile_gym_mbrl/lib/python3.9/multiprocessing/spawn.py", line 116, in spawn_main
    exitcode = _main(fd, parent_sentinel)
  File "/home/qt21590/anaconda3/envs/tactile_gym_mbrl/lib/python3.9/multiprocessing/spawn.py", line 126, in _main
    self = reduction.pickle.load(from_parent)
AttributeError: Can't get attribute 'init' on <module '__main__' (built-in)>
Traceback (most recent call last):
  File "<string>", li

KeyboardInterrupt: 

In [None]:
eval_env._p

AttributeError: 'ObjectPushEnv' object has no attribute '_p'