In [2]:
%load_ext autoreload
%autoreload 2

DEBUG = False

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [3]:
%matplotlib widget
import matplotlib.pyplot as plt

import torch
import numpy as np

import warp as wp
import time
import seaborn as sns
import imageio

if DEBUG:
    wp.config.mode = "debug"
    wp.config.verify_cuda = True
    # wp.config.print_launches = True

wp.init()

from warp.envs.environment import RenderMode
from warp.envs.cartpole_swing_up import CartPoleSwingUpEnv
from shac.envs.cartpole_swing_up import CartPoleSwingUpEnv as DFCartPoleSwingUpEnv
from warp.envs.hopper import HopperEnv
from shac.envs.hopper import HopperEnv as DFHopperEnv

Warp 0.7.2 initialized:
   CUDA Toolkit: 11.8, Driver: 11.8
   Devices:
     "cpu"    | x86_64
     "cuda:0" | NVIDIA GeForce RTX 3090 (sm_86)
   Kernel cache: /home/ksrini/.cache/warp/0.7.2
Using cached kernels


In [None]:
env = HopperEnv(num_envs = 2, render=True)

In [None]:
env = CartPoleSwingUpEnv(num_envs=2, render=False)

In [None]:
env_count = 2
env_times = []
env_size = []

for i in range(15):

    robot = HopperEnv(num_envs=env_count, render=False, requires_grad=True)
    steps_per_second = robot.run()

    env_size.append(env_count)
    env_times.append(steps_per_second)

    env_count *= 2

# dump times
for i in range(len(env_times)):
    print(f"envs: {env_size[i]} steps/second: {env_times[i]}")

plt.figure(1)
plt.plot(env_size, env_times)
plt.xscale('log')
plt.xlabel("Number of Envs")
plt.yscale('log')
plt.ylabel("Steps/Second")
plt.show()

In [None]:
env.visualize=True
env.initialize_renderer()

In [None]:
env.reset()
ac = torch.tensor([env.action_space.sample() for _ in range(env.num_envs)],
                  device=env.device)
obs, r, d, i = env.step(ac)

In [None]:
%time
# if env.render_mode == RenderMode.TINY:
#     writer = imageio.get_writer("test_render.mp4", fps=30)
# else:
#     writer = None
env.reset()
for _ in range(1000):
    ac = torch.tensor([env.action_space.sample()*0 for _ in range(env.num_envs)],
                      device=env.device)
    obs, r, d, i = env.step(ac)
    env.render()
    # img = env.render(mode="rgb_array")
    # if writer: writer.append_data(img)
    if d.sum() > 0: break
# if writer: writer.close()

In [None]:
env = HopperEnv(num_envs=2, render=True)

In [None]:
obs = env.reset()

In [None]:
env.step(torch.zeros((env.num_envs, env.num_acts), device=env.device))
img = env.render(mode="rgb_array")

plt.close('all')
plt.imshow(img)

In [None]:
env.state_0.body_q.numpy()

# Randomized Sampling

In [6]:
from shac.algorithms.mpc import Policy, Planner

In [8]:
env = CartPoleSwingUpEnv(num_envs=10, episode_length=30)

Running with stochastic_init:  False




In [10]:
p = Policy(num_actions=env.num_actions, horizon=0.5, max_steps=30)
plan = Planner(p, env)

plan.optimize_policy()

In [17]:
p.params

array([[-0.053],
       [ 0.065],
       [ 0.101],
       [-0.066],
       [ 0.047],
       [ 0.174],
       [-0.067],
       [ 0.168],
       [-0.085],
       [ 0.002],
       [-0.001],
       [ 0.001],
       [-0.084],
       [-0.059],
       [-0.067],
       [ 0.033],
       [ 0.033],
       [ 0.223],
       [ 0.137],
       [-0.051],
       [ 0.032],
       [ 0.1  ],
       [ 0.003],
       [-0.007],
       [ 0.005],
       [ 0.087],
       [-0.085],
       [-0.033],
       [ 0.047],
       [ 0.031]])

In [19]:
eval_env = CartPoleSwingUpEnv(num_envs=1, episode_length=30, render=True, stage_path="test-rs-planner-cartpole")
eval_plan = Planner(p, eval_env)

Running with stochastic_init:  False
Initializing renderer writing to path: outputs/test-rs-planner-cartpole




In [22]:
eval_plan.rollout(render=True)

tensor([-377.5608], device='cuda:0')

In [None]:
eval_env.calculateObservations()

In [27]:
eval_env.rew_buf

tensor([-40.0489], device='cuda:0')