In [1]:
import argparse
import multiprocessing as mp
import pathlib
import pickle
import time
from typing import Sequence, Tuple, cast
import os 

import gym.wrappers
import numpy as np
import omegaconf
import skvideo.io
import torch

import mbrl.planning;
import mbrl.util
from mbrl.util.env import EnvHandler
from mbrl.util.pybullet import FreezePybullet, PybulletEnvHandler

import tactile_gym.rl_envs
from tactile_gym.sb3_helpers.params import import_parameters

# produce a display to render image
from pyvirtualdisplay import Display
_display = Display(visible=False, size=(1400, 900))
_ = _display.start()

pybullet build time: Mar  8 2021 17:26:24


In [2]:
nv__: gym.Env
handler__: EnvHandler


def init(env_name: str, seed: int):
    global env__
    global handler__
    handler__ = mbrl.util.create_handler_from_str(env_name)
    env__ = handler__.make_env_from_str(env_name)
    env__.seed(seed)
    env__.reset()


def step_env(action: np.ndarray):
    global env__
    return env__.step(action)


def evaluate_all_action_sequences(
    action_sequences: Sequence[Sequence[np.ndarray]],
    pool: mp.Pool,  # type: ignore
    current_state: Tuple,
) -> torch.Tensor:

    res_objs = [
        pool.apply_async(evaluate_sequence_fn, (sequence, current_state))  # type: ignore
        for sequence in action_sequences
    ]
    res = [res_obj.get() for res_obj in res_objs]
    return torch.tensor(res, dtype=torch.float32)


def evaluate_sequence_fn(action_sequence: np.ndarray, current_state: Tuple) -> float:
    global env__
    global handler__
    # obs0__ is not used (only here for compatibility with rollout_env)
    obs0 = env__.observation_space.sample()
    env = cast(gym.wrappers.TimeLimit, env__)
    handler__.set_env_state(current_state, env)
    _, rewards_, _ = handler__.rollout_env(
        env, obs0, -1, agent=None, plan=action_sequence
    )
    return rewards_.sum().item()


def get_random_trajectory(horizon):
    global env__
    return [env__.action_space.sample() for _ in range(horizon)]

In [3]:
algo_name = 'ppo'
env_name = 'object_push-v0'
rl_params, algo_params, augmentations = import_parameters(env_name, algo_name)
rl_params["env_modes"][ 'observation_mode'] = 'tactile_pose_goal_excluded_data'
rl_params["env_modes"][ 'control_mode'] = 'TCP_position_control'
rl_params["env_modes"][ 'terminate_early']  = True
rl_params["env_modes"][ 'use_contact'] = True
rl_params["env_modes"][ 'traj_type'] = 'point'
rl_params["env_modes"][ 'task'] = "goal_pos"
rl_params["env_modes"]['planar_states'] = True

env_kwargs={
    'show_gui':False,
    'show_tactile':False,
    'max_steps':rl_params["max_ep_len"],
    'image_size':rl_params["image_size"],
    'env_modes':rl_params["env_modes"],
}

In [4]:
env = gym.make(env_name, **env_kwargs)
new_env = gym.make(env_name, **env_kwargs)

argv[0]=
Loaded EGL 1.5 after reload.
GL_VENDOR=NVIDIA Corporation
GL_RENDERER=NVIDIA GeForce RTX 3090/PCIe/SSE2
GL_VERSION=4.6.0 NVIDIA 495.29.05
GL_SHADING_LANGUAGE_VERSION=4.60 NVIDIA
Version = 4.6.0 NVIDIA 495.29.05
Vendor = NVIDIA Corporation
Renderer = NVIDIA GeForce RTX 3090/PCIe/SSE2
ven = NVIDIA Corporation
ven = NVIDIA Corporation
argv[0]=
Loaded EGL 1.5 after reload.
GL_VENDOR=NVIDIA Corporation
GL_RENDERER=NVIDIA GeForce RTX 3090/PCIe/SSE2
GL_VERSION=4.6.0 NVIDIA 495.29.05
GL_SHADING_LANGUAGE_VERSION=4.60 NVIDIA
Version = 4.6.0 NVIDIA 495.29.05
Vendor = NVIDIA Corporation
Renderer = NVIDIA GeForce RTX 3090/PCIe/SSE2
ven = NVIDIA Corporation
ven = NVIDIA Corporation


In [5]:
# x = pickle.dumps(env)
# print(env._pb.isConnected())
# env = pickle.loads(x)
# print(env._pb.isConnected())
# env.connect_pybullet()
# env._pb.isConnected()

In [6]:
# print(env._pb.isConnected())

In [7]:
# env.reset()
# action = env.action_space.sample()
# o1_expected, *_ = env.step(action)
# print(o1_expected)

In [8]:
# env.reset()
# state = PybulletEnvHandler.get_current_state(env)
# for i in range(10):
#     env.step(env.action_space.sample())
# PybulletEnvHandler.set_env_state(state, env)
# o1, *_ = env.step(action)
# print(o1)

In [9]:
# new_env.cur_obj_orn_worldframe

In [10]:
print(env._pb.isConnected())
print(env._env_step_counter)
print(env.cur_obj_orn_worldframe)
print(new_env._pb.isConnected())
print(new_env._env_step_counter)
print(new_env.cur_obj_orn_worldframe)

1
0
[0.707107 0.707107 -0.000000 -0.000000]
1
0
[0.707107 0.707107 -0.000000 -0.000000]


In [11]:
print(env.robot._pb.getNumJoints(env.robot.robot_id))

12


In [12]:
env.reset()
env.step(env.action_space.sample())
print(env.cur_obj_orn_worldframe)
print(new_env.cur_obj_orn_worldframe)
print(env.robot.arm.num_joints)
state = PybulletEnvHandler.get_current_state(env)
((filename, pickle_bytes),) = state
env = pickle.loads(pickle_bytes)
env.connect_pybullet()
env.setup_object()
env.load_environment()
env.load_object()
env.robot.load_robot()
# PybulletEnvHandler.load_state_from_file(env._pb, filename)

[0.706071 0.708141 -0.000021 0.000023]
[0.707107 0.707107 -0.000000 -0.000000]
12
Destroy EGL OpenGL window.
argv[0]=
Loaded EGL 1.5 after reload.
GL_VENDOR=NVIDIA Corporation
GL_RENDERER=NVIDIA GeForce RTX 3090/PCIe/SSE2
GL_VERSION=4.6.0 NVIDIA 495.29.05
GL_SHADING_LANGUAGE_VERSION=4.60 NVIDIA
Version = 4.6.0 NVIDIA 495.29.05
Vendor = NVIDIA Corporation
Renderer = NVIDIA GeForce RTX 3090/PCIe/SSE2


4

In [13]:
id = env.robot.load_robot()
print(id)
print(env.robot.robot_id)
print(env.robot._pb.getNumJoints(id))
env.reset()

5
5
12


error: Joint index out-of-range.

In [27]:
PybulletEnvHandler.load_state_from_file(env._pb, filename)

btMultiBodyWorldImporter::convertAllObjects error: expected 8 multibodies, got 6.
restoreState failed

error: Couldn't restore state.

In [21]:
print(env.robot._pb.getNumJoints(env.robot.robot_id))

12


In [None]:
print(env.robot.arm.num_joints)
print(env._pb.resetJointState(env.robot.arm.robot_id, 1, env.robot.arm.rest_poses[1]))

12


error: Joint index out-of-range.

In [None]:
print(env._pb.isConnected())
print(env.robot.base_orn)

0
(0.0, 0.0, 0.0, 1.0)


In [None]:
print(env._pb.isConnected())
print(env._env_step_counter)
print(env.cur_obj_orn_worldframe)
print(new_env._pb.isConnected())
print(new_env._env_step_counter)
print(new_env.cur_obj_orn_worldframe)

1
1
[0.709108 0.705100 -0.000227 0.000227]
1
0
[0.707107 0.707107 -0.000000 -0.000000]


In [None]:
print(new_env._pb.isConnected())
print(new_env._env_step_counter)
print(new_env.cur_obj_orn_worldframe)

1
1
[0.707400 0.706814 -0.000021 0.000018]


In [None]:

print(env.cur_obj_orn_worldframe)
print(new_env.cur_obj_orn_worldframe)
print(env.step(env.action_space.sample()))
print(new_env.step(new_env.action_space.sample()))


[0.706671 0.707543 -0.000017 0.000017]
[0.705866 0.708346 -0.000002 0.000001]
(array([0.002129, 0.000405, 0.007721, 0.999970, -0.000274, 0.000406,
       -0.000740, 1.000000]), -1.7706529672395603, False, {'num_of_pb_steps': 7, 'tip_in_contact': True})
(array([0.002627, 0.000282, 0.008447, 0.999964, 0.000171, 0.000283,
       -0.000031, 1.000000]), -1.7718746149449642, False, {'num_of_pb_steps': 7, 'tip_in_contact': True})


In [None]:
env.reset()
with FreezePybullet(env):
    for i in range(10):
        env.step(env.action_space.sample())
        print(i)

    if i == 9:
        pass

o1, *_ = env.step(action)
print(o1)


0
1
2
3
4
5
6
7
8
9
argv[0]=
Loaded EGL 1.5 after reload.
GL_VENDOR=NVIDIA Corporation
GL_RENDERER=NVIDIA GeForce RTX 3090/PCIe/SSE2
GL_VERSION=4.6.0 NVIDIA 495.29.05
GL_SHADING_LANGUAGE_VERSION=4.60 NVIDIA
Version = 4.6.0 NVIDIA 495.29.05
Vendor = NVIDIA Corporation
Renderer = NVIDIA GeForce RTX 3090/PCIe/SSE2
btMultiBodyWorldImporter::convertAllObjects error: expected 0 multibodies, got 6.
restoreState failed

error: Couldn't restore state.

In [None]:
mp.set_start_method("spawn")
handler_env_name = "pybulletgym___" + env_name
handler = mbrl.util.create_handler_from_str(handler_env_name)
eval_env = handler.make_env_from_str(handler_env_name, **env_kwargs)
seed = 0
eval_env.seed(seed)
torch.random.manual_seed(seed)
np.random.seed(seed)
current_obs = eval_env.reset()

pybullet build time: Mar  8 2021 17:26:24


argv[0]=
Loaded EGL 1.5 after reload.
GL_VENDOR=NVIDIA Corporation
GL_RENDERER=NVIDIA GeForce RTX 3090/PCIe/SSE2
GL_VERSION=4.6.0 NVIDIA 495.29.05
GL_SHADING_LANGUAGE_VERSION=4.60 NVIDIA
Version = 4.6.0 NVIDIA 495.29.05
Vendor = NVIDIA Corporation
Renderer = NVIDIA GeForce RTX 3090/PCIe/SSE2
ven = NVIDIA Corporation
ven = NVIDIA Corporation


In [None]:
num_processes = 1
samples_per_process = 512
optimizer_type = "cem"

if optimizer_type == "cem":
    optimizer_cfg = omegaconf.OmegaConf.create(
        {
            "_target_": "mbrl.planning.CEMOptimizer",
            "device": "cpu",
            "num_iterations": 5,
            "elite_ratio": 0.1,
            "population_size": num_processes * samples_per_process,
            "alpha": 0.1,
            "lower_bound": "???",
            "upper_bound": "???",
        }
    )
elif optimizer_type == "mppi":
    optimizer_cfg = omegaconf.OmegaConf.create(
        {
            "_target_": "mbrl.planning.MPPIOptimizer",
            "num_iterations": 5,
            "gamma": 1.0,
            "population_size": num_processes * samples_per_process,
            "sigma": 0.95,
            "beta": 0.1,
            "lower_bound": "???",
            "upper_bound": "???",
            "device": "cpu",
        }
    )
elif optimizer_type == "icem":
    optimizer_cfg = omegaconf.OmegaConf.create(
        {
            "_target_": "mbrl.planning.ICEMOptimizer",
            "num_iterations": 2,
            "elite_ratio": 0.1,
            "population_size": num_processes * samples_per_process,
            "population_decay_factor": 1.25,
            "colored_noise_exponent": 2.0,
            "keep_elite_frac": 0.1,
            "alpha": 0.1,
            "lower_bound": "???",
            "upper_bound": "???",
            "return_mean_elites": "true",
            "device": "cpu",
        }
    )
else:
    raise ValueError

controller = mbrl.planning.TrajectoryOptimizer(
        optimizer_cfg,
        eval_env.action_space.low,
        eval_env.action_space.high,
        10,
    )

  if OmegaConf.is_none(config):


In [None]:
work_dir = os.path.join(os.getcwd(), 'saved_control')
if not os.path.exists(work_dir):
    os.mkdir(work_dir)

render = True
num_steps = 1000

In [None]:
import tempfile
eval_env.reset()
bulletfile = tempfile.NamedTemporaryFile(suffix=".bullet").name
eval_env._pb.saveBullet(bulletfile)
pickle_bytes = pickle.dumps(eval_env)
print(eval_env.get_observation())
print(eval_env._env_step_counter)
for i in range(200):
    eval_env.step(eval_env.action_space.sample())
print(eval_env.get_observation())
new_eval_env = pickle.loads(pickle_bytes)
new_eval_env._pb.restoreState(fileName=bulletfile)
print(new_eval_env.get_observation())
print(new_eval_env._env_step_counter)
new_eval_env.step(eval_env.action_space.sample())

[0.000006 1.000000 -0.001000 0.000000 0.000000 1.000000]
0
[-0.049182 0.998790 0.137436 -0.003664 0.045815 0.998950]
[0.000005 1.000000 -0.001000 0.000000 0.000000 1.000000]
0


(array([0.000226, 1.000000, -0.000461, -0.000033, 0.000720, 1.000000]),
 -0.414154334936079,
 False,
 {'num_of_pb_steps': 7, 'tip_in_contact': True})

In [None]:
with mp.Pool(
    processes=num_processes, initializer=init, initargs=[handler_env_name, seed]
) as pool__:

    total_reward__ = 0
    frames = []
    max_population_size = optimizer_cfg.population_size
    if isinstance(controller.optimizer, mbrl.planning.ICEMOptimizer):
        max_population_size += controller.optimizer.keep_elite_size
    value_history = np.zeros(
        (num_steps, max_population_size, optimizer_cfg.num_iterations)
    )
    values_sizes = []  # for icem
    for t in range(num_steps):
        if render:
            frames.append(eval_env.render(mode="rgb_array"))
        start = time.time()

        current_state__ = handler.get_current_state(
            cast(gym.wrappers.TimeLimit, eval_env)
        )

        def trajectory_eval_fn(action_sequences):
            return evaluate_all_action_sequences(
                action_sequences,
                pool__,
                current_state__,
            )

        best_value = [0]  # this is hacky, sorry

        def compute_population_stats(_population, values, opt_step):
            value_history[t, : len(values), opt_step] = values.numpy()
            values_sizes.append(len(values))
            best_value[0] = max(best_value[0], values.max().item())

        plan = controller.optimize(
            trajectory_eval_fn, callback=compute_population_stats
        )
        action__ = plan[0]
        next_obs__, reward__, done__, _ = eval_env.step(action__)

        total_reward__ += reward__

        print(
            f"step: {t}, time: {time.time() - start: .3f}, "
            f"reward: {reward__: .3f}, pred_value: {best_value[0]: .3f}, "
            f"total_reward: {total_reward__: .3f}"
        )

    output_dir = pathlib.Path(work_dir)
    output_dir = output_dir / handler_env_name / optimizer_type
    pathlib.Path.mkdir(output_dir, exist_ok=True, parents=True)

    if render:
        frames_np = np.stack(frames)
        writer = skvideo.io.FFmpegWriter(
            output_dir / f"control_{handler_env_name}_video.mp4", verbosity=1
        )
        for i in range(len(frames_np)):
            writer.writeFrame(frames_np[i, :, :, :])
        writer.close()

    print("total_reward: ", total_reward__)
    np.save(output_dir / "value_history.npy", value_history)


Traceback (most recent call last):
  File "<string>", line 1, in <module>
  File "/home/qt21590/anaconda3/envs/tactile_gym_mbrl/lib/python3.9/multiprocessing/spawn.py", line 116, in spawn_main
    exitcode = _main(fd, parent_sentinel)
  File "/home/qt21590/anaconda3/envs/tactile_gym_mbrl/lib/python3.9/multiprocessing/spawn.py", line 126, in _main
    self = reduction.pickle.load(from_parent)
AttributeError: Can't get attribute 'init' on <module '__main__' (built-in)>
Traceback (most recent call last):
  File "<string>", line 1, in <module>
  File "/home/qt21590/anaconda3/envs/tactile_gym_mbrl/lib/python3.9/multiprocessing/spawn.py", line 116, in spawn_main
    exitcode = _main(fd, parent_sentinel)
  File "/home/qt21590/anaconda3/envs/tactile_gym_mbrl/lib/python3.9/multiprocessing/spawn.py", line 126, in _main
    self = reduction.pickle.load(from_parent)
AttributeError: Can't get attribute 'init' on <module '__main__' (built-in)>
Traceback (most recent call last):
  File "<string>", li

KeyboardInterrupt: 

In [None]:
eval_env._p

AttributeError: 'ObjectPushEnv' object has no attribute '_p'