In [1]:
import numpy as np
import minari
import gymnasium as gym

  from .autonotebook import tqdm as notebook_tqdm


## Investigate Remote Dataset ##

First, load a dataset.

In [2]:
dataset_door = minari.load_dataset('door-human-v2')

Check different Attributes.

In [3]:
print("Dataset id: ", dataset_door._dataset_id)
print("Total Steps: ", dataset_door.total_steps)
print("Total Episodes: ", dataset_door.total_episodes)
print("Indices of available episodes: ", dataset_door.episode_indices)

Dataset id:  door-human-v2
Total Steps:  6729
Total Episodes:  25
Indices of available episodes:  [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24]


Sample one episode.

In [4]:
episode_data = dataset_door.sample_episodes(1)[0]

### Investigate EpisodeData ###

In [5]:
episode_data.observations.shape

(266, 39)

In [6]:
episode_data.terminations

array([False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False,

In [7]:
episode_data.truncations

array([False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False,

In [8]:
episode_data.total_timesteps

265

## Investigate how to create dataset from offline buffers ##

A $\bf{buffer}$ is essentially a list of episode data. Each episode data should be a dictionary. For example, episode_data['action'] should be the array of actions taken during the episode.

Let's make up some data. Suppose we have 3 episodes. First episode has 10 time steps, second has 20, third has 30. Suppose we have 3-dim observation, 2-dim action.

In [9]:
ep1_observations = np.random.rand(10 + 1, 3)
ep2_observations = np.random.rand(20 + 1, 3)
ep3_observations = np.random.rand(30 + 1, 3)

ep1_actions= np.random.rand(10, 2)
ep2_actions = np.random.rand(20, 2)
ep3_actions = np.random.rand(30, 2)


ep1_rewrads = np.random.rand(10, 1)
ep2_rewrads = np.random.rand(20, 1)
ep3_rewrads = np.random.rand(30, 1)

ep1_terminations = np.asarray([False for _ in range(10)])
ep2_terminations = np.asarray([False for _ in range(20)])
ep3_terminations = np.asarray([False for _ in range(30)])


ep1_truncations= np.asarray([False for _ in range(10 + 1)])
ep1_truncations[-1] = True
ep2_truncations = np.asarray([False for _ in range(20 + 1)])
ep2_truncations[-1] = True
ep3_truncations = np.asarray([False for _ in range(30 + 1)])
ep3_truncations[-1] = True

In [10]:
ep1 = {'observations': ep1_observations, 'actions': ep1_actions, 'rewards': ep1_rewrads, 'terminations': ep1_terminations, 'truncations': ep1_truncations}
ep2 = {'observations': ep2_observations, 'actions': ep2_actions, 'rewards': ep2_rewrads, 'terminations': ep2_terminations, 'truncations': ep2_truncations}
ep3 = {'observations': ep3_observations, 'actions': ep3_actions, 'rewards': ep3_rewrads, 'terminations': ep3_terminations, 'truncations': ep3_truncations}

In [11]:
dataset =  minari.create_dataset_from_buffers("random-dataset-v0", buffer=[ep1, ep2, ep3], 
                                              action_space=gym.spaces.Box(low=0.0, high=1.0, shape=(2, 1)), 
                                              observation_space=gym.spaces.Box(low=0.0, high=1.0, shape=(3, 1))
                                              )



In [13]:
print("Dataset id: ", dataset._dataset_id)
print("Total Steps: ", dataset.total_steps)
print("Total Episodes: ", dataset.total_episodes)
print("Indices of available episodes: ", dataset.episode_indices)

Dataset id:  random-dataset-v0
Total Steps:  60
Total Episodes:  3
Indices of available episodes:  [0 1 2]


In [17]:
rand_episode = dataset.sample_episodes(1)[0]

In [19]:
rand_episode.observations.shape

(11, 3)

In [20]:
rand_episode.terminations

array([False, False, False, False, False, False, False, False, False,
       False])

In [21]:
rand_episode.truncations

array([False, False, False, False, False, False, False, False, False,
       False,  True])

In [22]:
rand_episode.total_timesteps

10