In [1]:
# %load_ext autoreload
# %autoreload 2

DEBUG = False

In [2]:
%matplotlib widget
import matplotlib.pyplot as plt

import torch
import numpy as np

import warp as wp
import time
import seaborn as sns
import imageio

if DEBUG:
    wp.config.mode = "debug"
    wp.config.verify_cuda = True
    # wp.config.print_launches = True

wp.init()

from warp.envs.environment import RenderMode
from warp.envs.cartpole_swing_up import CartPoleSwingUpEnv
from shac.envs.cartpole_swing_up import CartPoleSwingUpEnv as DFCartPoleSwingUpEnv
from warp.envs.hopper import HopperEnv
from shac.envs.hopper import HopperEnv as DFHopperEnv

Warp 0.8.2 initialized:
   CUDA Toolkit: 12.0, Driver: 12.1
   Devices:
     "cpu"    | x86_64
     "cuda:0" | NVIDIA GeForce RTX 3090 (sm_86)
   Kernel cache: /home/ksrini/.cache/warp/0.8.2
Using cached kernels


In [None]:
env.renderer.move_camera()

In [None]:
env.step(

In [None]:
env = HopperEnv(num_envs = 2, render=True)

In [None]:
env_count = 2
env_times = []
env_size = []

for i in range(15):

    robot = HopperEnv(num_envs=env_count, render=False, requires_grad=True)
    steps_per_second = robot.run()

    env_size.append(env_count)
    env_times.append(steps_per_second)

    env_count *= 2

# dump times
for i in range(len(env_times)):
    print(f"envs: {env_size[i]} steps/second: {env_times[i]}")

plt.figure(1)
plt.plot(env_size, env_times)
plt.xscale('log')
plt.xlabel("Number of Envs")
plt.yscale('log')
plt.ylabel("Steps/Second")
plt.show()

In [None]:
env.visualize=True
env.initialize_renderer()

In [None]:
env.reset()
ac = torch.tensor([env.action_space.sample() for _ in range(env.num_envs)],
                  device=env.device)
obs, r, d, i = env.step(ac)

In [None]:
%time
# if env.render_mode == RenderMode.TINY:
#     writer = imageio.get_writer("test_render.mp4", fps=30)
# else:
#     writer = None
env.reset()
for _ in range(1000):
    ac = torch.tensor([env.action_space.sample()*0 for _ in range(env.num_envs)],
                      device=env.device)
    obs, r, d, i = env.step(ac)
    env.render()
    # img = env.render(mode="rgb_array")
    # if writer: writer.append_data(img)
    if d.sum() > 0: break
# if writer: writer.close()

In [None]:
env = HopperEnv(num_envs=2, render=True)

In [None]:
obs = env.reset()

In [None]:
env.step(torch.zeros((env.num_envs, env.num_acts), device=env.device))
img = env.render(mode="rgb_array")

plt.close('all')
plt.imshow(img)

In [None]:
env.state_0.body_q.numpy()

# Randomized Sampling

In [None]:
from shac.algorithms.mpc2 import Policy, Planner
from tqdm.notebook import trange

In [None]:
env = CartPoleSwingUpEnv(num_envs=100, episode_length=240)
eval_env = CartPoleSwingUpEnv(num_envs=1, episode_length=240, render=True)
_, _ = env.reset(), eval_env.reset()

In [None]:
env.joint_q.shape, eval_env.joint_q.shape

In [None]:
p = Policy(num_actions=env.num_actions, horizon=0.5, max_steps=10)
plan = Planner(p, env)
eval_plan = Planner(p, eval_env)

eval_obs, eval_reward = [], []

for step in trange(240):
    plan.optimize_policy()
    obs, reward = eval_plan.one_step(step, p)
    eval_obs.append(obs)
    eval_reward.append(reward)
    plan.copy_eval(eval_plan)

In [None]:
plt.close("all")
plt.plot([r.detach().cpu().numpy() for r in eval_reward])
plt.show()

In [None]:
env.num_envs

In [None]:
eval_env = CartPoleSwingUpEnv(num_envs=1, episode_length=30, render=True, stage_path="test-rs-planner-cartpole")
eval_plan = Planner(p, eval_env)

In [None]:
eval_plan.rollout(render=True)

In [None]:
eval_env.calculateObservations()

In [None]:
eval_env.rew_buf

# Use Pre-trained SHAC policy

In [4]:
from warp.envs.environment import RenderMode
from warp.envs.cartpole_swing_up import CartPoleSwingUpEnv
CartPoleSwingUpEnv.render_mode = RenderMode.TINY
from shac.algorithms.shac import SHAC
import yaml

In [5]:
from gym.wrappers import Monitor

In [6]:
load_path = "../../../shac/scripts/outputs/2023-04-11/23-22-50/logs/tmp/shac/04-11-2023-23-22-50"

config_file = f"{load_path}/cfg.yaml"
ckpt_path = f"{load_path}/best_policy.pt"

In [7]:
cfg_train = yaml.load(open(config_file, "r"), Loader=yaml.Loader)
cfg_train['params']['diff_env']['name'] = "CartPoleSwingUpWarpEnv"
cfg_train['params']['diff_env']['render'] = True
cfg_train['params']['diff_env']['no_grad'] = True
cfg_train['params']['diff_env']['stochastic_init'] = True
cfg_train['params']['config']['num_actors'] = 1


In [8]:
traj_opt = SHAC(cfg_train)

Setting seed: 0
Running with stochastic_init:  True




Module warp.sim.collide load on device 'cuda:0' took 38.45 ms
Allocating 16 out of 16 potential rigid contacts
Initializing renderer writing to path: outputs/CartPoleSwingUpEnv_1
Loaded EGL 1.5 after reload.
GL_VENDOR=NVIDIA Corporation
GL_RENDERER=NVIDIA GeForce RTX 3090/PCIe/SSE2
GL_VERSION=4.6.0 NVIDIA 520.61.05
GL_SHADING_LANGUAGE_VERSION=4.60 NVIDIA
Version = 4.6.0 NVIDIA 520.61.05
Vendor = NVIDIA Corporation
Renderer = NVIDIA GeForce RTX 3090/PCIe/SSE2
num_envs =  1
num_actions =  1
num_obs =  5
Sequential(
  (0): Linear(in_features=5, out_features=64, bias=True)
  (1): ELU(alpha=1.0)
  (2): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
  (3): Linear(in_features=64, out_features=64, bias=True)
  (4): ELU(alpha=1.0)
  (5): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
  (6): Linear(in_features=64, out_features=1, bias=True)
  (7): Identity()
)
Parameter containing:
tensor([-1.], device='cuda:0', requires_grad=True)
Sequential(
  (0): Linear(in_features=5, out_feature

In [11]:
!mkdir outputs/videos

In [20]:
?get_writer

[0;31mSignature:[0m [0mget_writer[0m[0;34m([0m[0muri[0m[0;34m,[0m [0mformat[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m [0mmode[0m[0;34m=[0m[0;34m'?'[0m[0;34m,[0m [0;34m**[0m[0mkwargs[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m
get_writer(uri, format=None, mode='?', **kwargs)

Returns a :class:`.Writer` object which can be used to write data
and meta data to the specified file.

Parameters
----------
uri : {str, pathlib.Path, file}
    The resource to write the image to, e.g. a filename, pathlib.Path
    or file object, see the docs for more info.
format : str
    The format to use to write the file. By default imageio selects
    the appropriate for you based on the filename.
mode : {'i', 'I', 'v', 'V', '?'}
    Used to give the writer a hint on what the user expects (default '?'):
    "i" for an image, "I" for multiple images, "v" for a volume,
    "V" for multiple volumes, "?" for don't care.
kwargs : ...
    Further keyword arguments are pas

In [89]:
import os
from imageio import get_writer

class Monitor:
    def __init__(self, env, save_dir, ep_filter=None):
        self.env = env
        assert self.env.render_mode == RenderMode.TINY
        self.writer = None
        self.save_dir = save_dir or "./videos/"
        os.makedirs(self.save_dir, exist_ok=True)
        self.ep_filter = ep_filter
        self.num_episodes = 0
        
    def reset(self, *args, **kwargs):
        ret = self.env.reset(*args, **kwargs)
        self.env.renderer.move_camera(np.zeros(3), 5, 225, -20)  # resets default camera pose
        if self.writer:
            self.writer.close()
        if self.ep_filter is None or self.ep_filter(self.num_episodes):
            self.writer = get_writer(os.path.join(self.save_dir, f"ep-{self.num_episodes}.mp4"), fps=int(1/self.env.dt))
        else:
            self.writer = None
        self.num_episodes += 1
        return ret
    
    def step(self, action):
        res = self.env.step(action)
        if self.writer is not None:
            self.render()
        return res
    
    def render(self):
        if self.writer is None:
            return
        img = self.env.render(mode="rgb_array")
        self.writer.append_data((255*img).astype(np.uint8))
        return
        
    def __getattr__(self, name):
        if name.startswith('_'):
            raise AttributeError("attempted to get missing private attribute '{}'".format(name))
        return getattr(self.env, name)

    def close(self):
        self.env.close()
        if self.writer is not None:
            self.writer.close()

In [90]:
env = Monitor(traj_opt.env.env, "./outputs/videos")

In [91]:
traj_opt.env = env

In [92]:
traj_opt.load(ckpt_path)

In [93]:
for _ in range(10):
    # env.reset()
    traj_opt.evaluate_policy(1)

loss = 886.66, len = 240
loss = 206037.48, len = 240
loss = 38344.06, len = 240
loss = 11525.76, len = 240
loss = 1070.44, len = 240
loss = 8177.70, len = 240
loss = 2404.74, len = 240
loss = 927.82, len = 240
loss = 2369.29, len = 240
loss = 12297.32, len = 240


In [84]:
from IPython.display import Video

In [88]:
Video("./outputs/videos/ep-13.mp4")