# Archive buffer

Define the spaces and the cell factory.

In [1]:
from lge.inverse_model import LinearInverseModel
from lge.go_explore import Goalify
import gym

env = gym.make("MountainCar-v0")
obs_space = env.observation_space
env = Goalify(env)
inverse_model = LinearInverseModel(obs_space.shape[0], action_size=env.action_space.n, latent_size=3)

Define the buffer.

In [2]:
from lge.archive import ArchiveBuffer

archive = ArchiveBuffer(
    buffer_size=100,
    observation_space=env.observation_space,
    action_space=env.action_space,
    env=env,
    inverse_model=inverse_model,
    n_envs=2,
)

Feed the buffer.

In [3]:
import numpy as np

trajectories = np.array(
    [
        [[0, 0], [0, 1], [0, 2], [0, 3], [0, 4], [0, 5], [0, 6]],
        [[0, 0], [1, 0], [1, 1], [1, 2], [1, 3], [0, 3], [0, 4]],
    ]
)
goals = np.array(
    [
        [[0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0]],
        [[0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0]],
    ]
)

observations = [
    {
        "observation": trajectories[:, i],
        "goal": goals[:, i],
    }
    for i in range(7)
]

In [4]:
for i in range(6):
    archive.add(
        obs=observations[i],
        next_obs=observations[i + 1],
        action=np.array([[0], [0]]),
        reward=np.array([0, 0]),
        done=np.ones(2) * (i == 5),
        infos=[{}, {}],
    )

In [5]:
archive.recompute_embeddings()

Try sampling trajectory method.

In [6]:
archive.sample_trajectory()

(array([[0., 1.],
        [0., 2.],
        [0., 3.],
        [0., 4.],
        [0., 5.]], dtype=float32),
 array([[-0.9225087 ,  0.2057623 , -0.17049775],
        [-1.4706639 ,  0.3912635 , -0.16262755],
        [-2.0073988 ,  0.58207   , -0.14170998],
        [-2.5441337 ,  0.7728765 , -0.12079245],
        [-3.0808685 ,  0.963683  , -0.09987494]], dtype=float32))

Here is the set of possible trajectories.

In [7]:
possible_trajectories = [
    [[0, 1]],
    [[0, 1], [0, 2]],
    [[0, 1], [0, 2], [0, 3]],
    [[0, 1], [0, 2], [0, 3], [0, 4]],
    [[0, 1], [0, 2], [0, 3], [0, 4], [0, 5]],
    [[0, 1], [0, 2], [0, 3], [0, 4], [0, 5], [0, 6]],
    [[1, 0]],
    [[1, 0], [1, 1]],
    [[1, 0], [1, 1], [1, 2]],
    [[1, 0], [1, 1], [1, 2], [1, 3]],
    [[1, 0], [1, 1], [1, 2], [1, 3], [0, 3]],
    [[1, 0], [1, 1], [1, 2], [1, 3], [0, 3], [0, 4]],
]

Check that all possible trajectories are sampled.
Also check that all sampled trajectories are possible.

In [8]:
sampled_trajectories = [archive.sample_trajectory()[0].astype(int).tolist() for _ in range(50)]  # list convinient to compare

assert np.all([trajectory in possible_trajectories for trajectory in sampled_trajectories])
assert np.all([trajectory in sampled_trajectories for trajectory in possible_trajectories])

[True, True, True, True, True, True, True, True, True, True, True, True]

Do the same with observation step at 2.

In [10]:
possible_trajectories = [
    [[0, 1]],
    [[0, 2]],
    [[0, 1], [0, 3]],
    [[0, 2], [0, 4]],
    [[0, 1], [0, 3], [0, 5]],
    [[0, 2], [0, 4], [0, 6]],
    [[1, 0]],
    [[1, 1]],
    [[1, 0], [1, 2]],
    [[1, 1], [1, 3]],
    [[1, 0], [1, 2], [0, 3]],
    [[1, 1], [1, 3], [0, 4]],
]
sampled_trajectories = [archive.sample_trajectory(step=2)[0].astype(int).tolist() for _ in range(50)]

assert np.all([trajectory in possible_trajectories for trajectory in sampled_trajectories])
assert np.all([trajectory in sampled_trajectories for trajectory in possible_trajectories])