In [3]:
%pip install --upgrade mani_skill tyro diffusers sk-video

Collecting tyro
  Downloading tyro-0.8.5-py3-none-any.whl.metadata (8.2 kB)
Collecting diffusers
  Downloading diffusers-0.29.2-py3-none-any.whl.metadata (19 kB)
Collecting sk-video
  Downloading sk_video-1.1.10-py2.py3-none-any.whl.metadata (1.0 kB)
Collecting docstring-parser>=0.16 (from tyro)
  Downloading docstring_parser-0.16-py3-none-any.whl.metadata (3.0 kB)
Collecting rich>=11.1.0 (from tyro)
  Downloading rich-13.7.1-py3-none-any.whl.metadata (18 kB)
Collecting shtab>=1.5.6 (from tyro)
  Downloading shtab-1.7.1-py3-none-any.whl.metadata (7.3 kB)
Collecting importlib-metadata (from diffusers)
  Downloading importlib_metadata-8.0.0-py3-none-any.whl.metadata (4.6 kB)
Collecting regex!=2019.12.17 (from diffusers)
  Downloading regex-2024.5.15-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (40 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m40.9/40.9 kB[0m [31m3.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting safetensors>=0.3.1 (from diff

In [8]:
from typing import Tuple, Sequence, Dict, Union, Optional, Callable
import numpy as np
import math
import os

from diffusers.schedulers.scheduling_ddpm import DDPMScheduler
from diffusers.training_utils import EMAModel
from diffusers.optimization import get_scheduler
from tqdm.auto import tqdm

import h5py

import gymnasium as gym

from mani_skill.utils.io_utils import load_json
import mani_skill.envs
from mani_skill.trajectory.dataset import ManiSkillTrajectoryDataset

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

import cv2
from skvideo.io import vwrite
from IPython.display import Video


In [10]:
#!python -m mani_skill.utils.download_demo all

All demonstrations will be downloaded. This may take a while.
Downloading demonstrations to /home/jangruhnert/.maniskill/demos - 1/5, PickCube-v1
19.1Mit [00:02, 8.72Mit/s]                                                      
Downloading demonstrations to /home/jangruhnert/.maniskill/demos - 2/5, PushCube-v1
9.27Mit [00:01, 5.45Mit/s]                                                      
Downloading demonstrations to /home/jangruhnert/.maniskill/demos - 3/5, StackCube-v1
16.8Mit [00:01, 9.22Mit/s]                                                      
Downloading demonstrations to /home/jangruhnert/.maniskill/demos - 4/5, PegInsertionSide-v1
35.6Mit [00:03, 11.0Mit/s]                                                      
Downloading demonstrations to /home/jangruhnert/.maniskill/demos - 5/5, PlugCharger-v1
24.3Mit [00:02, 8.95Mit/s]                                                      


In [14]:
# Replay demonstrations with control_mode=pd_joint_pos
#!python -m mani_skill.trajectory.replay_trajectory \
#  --traj-path /content/drive/MyDrive/Data/Training/demos/PickCube-v1/rl/trajectory.h5 \
#  --save-traj --target-control-mode pd_joint_pos \
#  --obs-mode state --num-procs 10

!python -m mani_skill.trajectory.replay_trajectory \
  --traj-path ./data/PickCube-v1/rl/trajectory.h5 \
  --save-traj \
  --obs-mode pointcloud \
  --sim-backend cpu

0step [00:00, ?step/s]Traceback (most recent call last):
  File "<frozen runpy>", line 198, in _run_module_as_main
  File "<frozen runpy>", line 88, in _run_code
  File "/home/jangruhnert/Documents/GitHub/uni-dll-nr2/myenv/lib64/python3.12/site-packages/mani_skill/trajectory/replay_trajectory.py", line 610, in <module>
    main(parse_args())
  File "/home/jangruhnert/Documents/GitHub/uni-dll-nr2/myenv/lib64/python3.12/site-packages/mani_skill/trajectory/replay_trajectory.py", line 604, in main
    _main(args)
  File "/home/jangruhnert/Documents/GitHub/uni-dll-nr2/myenv/lib64/python3.12/site-packages/mani_skill/trajectory/replay_trajectory.py", line 415, in _main
    env = gym.make(env_id, **env_kwargs)
          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/jangruhnert/Documents/GitHub/uni-dll-nr2/myenv/lib64/python3.12/site-packages/gymnasium/envs/registration.py", line 802, in make
    env = env_creator(**env_spec_kwargs)
          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/jangruhn

In [None]:
from typing import Union

import h5py
import numpy as np
from torch.utils.data import Dataset
from tqdm import tqdm

from mani_skill.utils import common
from mani_skill.utils.io_utils import load_json


# loads h5 data into memory for faster access
def load_h5_data(data):
    out = dict()
    for k in data.keys():
        if isinstance(data[k], h5py.Dataset):
            out[k] = data[k][:]
        else:
            out[k] = load_h5_data(data[k])
    return out

def create_sample_indices(episode_ends: np.ndarray, sequence_length: int, pad_before: int = 0, pad_after: int = 0):
    indices = []
    episode_length = 0
    for i in range(len(episode_ends)):
        if not episode_ends[i]:
            episode_length += 1
        else:
            start_idx = 0 if i == 0 else i
            min_start = -pad_before
            max_start = episode_length - sequence_length + pad_after

            for idx in range(min_start, max_start + 1):
                buffer_start_idx = max(idx, 0) + start_idx
                buffer_end_idx = min(idx + sequence_length, episode_length) + start_idx
                start_offset = buffer_start_idx - (idx + start_idx)
                end_offset = (idx + sequence_length + start_idx) - buffer_end_idx
                sample_start_idx = 0 + start_offset
                sample_end_idx = sequence_length - end_offset
                indices.append([buffer_start_idx, buffer_end_idx, sample_start_idx, sample_end_idx])
    return np.array(indices)


def sample_sequence(train_data, sequence_length, buffer_start_idx, buffer_end_idx, sample_start_idx, sample_end_idx):
    result = []
  
    sample = train_data[buffer_start_idx:buffer_end_idx]
    data = np.zeros(shape=(sequence_length,) + train_data.shape[1:], dtype=train_data.dtype)
    if sample_start_idx > 0:
        data[:sample_start_idx] = sample[0]
    if sample_end_idx < sequence_length:
        data[sample_end_idx:] = sample[-1]
    data[sample_start_idx:sample_end_idx] = sample
    result.append(data)
  
    return np.vstack(result)

# normalize data
def get_data_stats(data):
  data = data.reshape(-1,data.shape[-1])
  stats = {
    'min': np.min(data, axis=0),
    'max': np.max(data, axis=0)
  }
  return stats

def normalize_data(obs, terminated):
    episode_min = 0
    episode_max = 0
    for i in range(len(terminated)):
        if terminated[i]:
            episode_max = i
            if episode_min != episode_max:   
                counter = (obs[episode_min:episode_max] - obs[episode_min:episode_max].min(axis=0))
                divider = (obs[episode_min:episode_max].max(axis=0) - obs[episode_min:episode_max].min(axis=0))
                obs[episode_min:episode_max] =  counter / divider
            episode_min = i + 1
        else:
            episode_max = i

class CustomManiSkillTrajectoryDataset(Dataset):
    """
    A general torch Dataset you can drop in and use immediately with just about any trajectory .h5 data generated from ManiSkill.
    This class simply is a simple starter code to load trajectory data easily, but does not do any data transformation or anything
    advanced. We recommend you to copy this code directly and modify it for more advanced use cases

    Args:
        dataset_file (str): path to the .h5 file containing the data you want to load
        load_count (int): the number of trajectories from the dataset to load into memory. If -1, will load all into memory
        success_only (bool): whether to skip trajectories that are not successful in the end. Default is false
        device: The location to save data to. If None will store as numpy (the default), otherwise will move data to that device
    """

    def __init__(
        self, dataset_file: str, pred_horizon: int, obs_horizon: int, action_horizon:int, load_count=-1, success_only: bool = False, normalize: bool = False, device=None
    ) -> None:
        self.dataset_file = dataset_file
        self.pred_horizon = pred_horizon
        self.obs_horizon = obs_horizon
        self.action_horizon = action_horizon
        self.normalize = normalize
        self.device = device
        self.data = h5py.File(dataset_file, "r")
        json_path = dataset_file.replace(".h5", ".json")
        self.json_data = load_json(json_path)
        self.episodes = self.json_data["episodes"]
        self.env_info = self.json_data["env_info"]
        self.env_id = self.env_info["env_id"]
        self.env_kwargs = self.env_info["env_kwargs"]

        self.obs = None
        self.actions = []
        self.terminated = []
        self.truncated = []
        self.success, self.fail, self.rewards = None, None, None
        if load_count == -1:
            load_count = len(self.episodes)
        for eps_id in tqdm(range(load_count)):
            eps = self.episodes[eps_id]
            if success_only:
                assert (
                    "success" in eps
                ), "episodes in this dataset do not have the success attribute, cannot load dataset with success_only=True"
                if not eps["success"]:
                    continue
            trajectory = self.data[f"traj_{eps['episode_id']}"]
            trajectory = load_h5_data(trajectory)
            eps_len = len(trajectory["actions"])

            # exclude the final observation as most learning workflows do not use it
            obs = common.index_dict_array(trajectory["obs"], slice(eps_len))
            if eps_id == 0:
                self.obs = obs
            else:
                self.obs = common.append_dict_array(self.obs, obs)

            self.actions.append(trajectory["actions"])
            self.terminated.append(trajectory["terminated"])
            self.truncated.append(trajectory["truncated"])

            # handle data that might optionally be in the trajectory
            if "rewards" in trajectory:
                if self.rewards is None:
                    self.rewards = [trajectory["rewards"]]
                else:
                    self.rewards.append(trajectory["rewards"])
            if "success" in trajectory:
                if self.success is None:
                    self.success = [trajectory["success"]]
                else:
                    self.success.append(trajectory["success"])
            if "fail" in trajectory:
                if self.fail is None:
                    self.fail = [trajectory["fail"]]
                else:
                    self.fail.append(trajectory["fail"])

        self.actions = np.vstack(self.actions)
        self.terminated = np.concatenate(self.terminated)
        self.truncated = np.concatenate(self.truncated)

        if self.rewards is not None:
            self.rewards = np.concatenate(self.rewards)
        if self.success is not None:
            self.success = np.concatenate(self.success)
        if self.fail is not None:
            self.fail = np.concatenate(self.fail)

        def remove_np_uint16(x: Union[np.ndarray, dict]):
            if isinstance(x, dict):
                for k in x.keys():
                    x[k] = remove_np_uint16(x[k])
                return x
            else:
                if x.dtype == np.uint16:
                    return x.astype(np.int32)
                return x

        # uint16 dtype is used to conserve disk space and memory
        # you can optimize this dataset code to keep it as uint16 and process that
        # dtype of data yourself. for simplicity we simply cast to a int32 so
        # it can automatically be converted to torch tensors without complaint
        self.obs = remove_np_uint16(self.obs)
        
        
        # Initialize index lists and stat dicts
        self.indices = create_sample_indices(
            episode_ends=self.terminated, 
            sequence_length=self.pred_horizon,
            pad_before=self.obs_horizon - 1,
            pad_after=self.action_horizon - 1
        )
        
        # normalize observations between -1 and 1
        if self.normalize:
            self.obs = normalize_data(self.obs, self.terminated)

        
        if device is not None:
            self.actions = common.to_tensor(self.actions, device=device)
            self.obs = common.to_tensor(self.obs, device=device)
            self.terminated = common.to_tensor(self.terminated, device=device)
            self.truncated = common.to_tensor(self.truncated, device=device)
            if self.rewards is not None:
                self.rewards = common.to_tensor(self.rewards, device=device)
            if self.success is not None:
                self.success = common.to_tensor(self.terminated, device=device)
            if self.fail is not None:
                self.fail = common.to_tensor(self.truncated, device=device)

    def __len__(self):
        return len(self.actions)

    def __getitem__(self, idx):
        buffer_start_idx, buffer_end_idx, sample_start_idx, sample_end_idx = self.indices[idx]
        obs = sample_sequence(
            train_data=self.obs, 
            sequence_length=self.pred_horizon,
            buffer_start_idx=buffer_start_idx,
            buffer_end_idx=buffer_end_idx,
            sample_start_idx=sample_start_idx,
            sample_end_idx=sample_end_idx
        )
        
         # Sample actions directly (assuming they don't need normalization)
        action = self.actions[buffer_start_idx + self.obs_horizon - 1 : buffer_end_idx + self.obs_horizon - 1 + self.action_horizon]

        # If sequence is shorter than action_horizon, pad it
        if len(action) < self.action_horizon:
            padding = np.zeros((self.action_horizon - len(action),) + action.shape[1:], dtype=action.dtype)
            action = np.concatenate([action, padding])

        
        action = common.to_tensor(action, device=self.device)
        obs = common.to_tensor(obs, device=self.device)

        res = dict(
            obs=obs,
            action=action,
            terminated=self.terminated[idx],
            truncated=self.truncated[idx],
        )
        if self.rewards is not None:
            res.update(reward=self.rewards[idx])
        if self.success is not None:
            res.update(success=self.success[idx])
        if self.fail is not None:
            res.update(fail=self.fail[idx])
        return res

In [None]:
#ManiSkill Dataset
dataset_file = '/content/drive/MyDrive/Data/Training/demos/PickCube-v1/rl/trajectory.state.pd_joint_delta_pos.h5'
%ls /content/drive/MyDrive/Data/Training/demos/PickCube-v1/rl

load_count = 10
succes_only = True
normalize = False # Normalization not working yet
device = torch.device('cuda')

pred_horizon = 16
obs_horizon = 2
action_horizon = 8
#|o|o|                             observations: 2
#| |a|a|a|a|a|a|a|a|               actions executed: 8
#|p|p|p|p|p|p|p|p|p|p|p|p|p|p|p|p| actions predicted: 16


# create dataset from file
dataset = CustomManiSkillTrajectoryDataset(dataset_file, pred_horizon, obs_horizon, action_horizon, load_count, succes_only, normalize)

