In [27]:
import numpy as np
import gym

In [None]:
class ReplayBuffer:
    """
    A simple FIFO experience replay buffer for SAC agents.
    """

    def __init__(self, obs_dim, act_dim, size):
        self.obs1_buf = np.zeros([size, obs_dim], dtype=np.float32)
        self.obs2_buf = np.zeros([size, obs_dim], dtype=np.float32)
        self.acts_buf = np.zeros([size, act_dim], dtype=np.float32)
        self.rews_buf = np.zeros(size, dtype=np.float32)
        self.done_buf = np.zeros(size, dtype=np.float32)
        self.ptr, self.size, self.max_size = 0, 0, size

    def store(self, obs, act, rew, next_obs, done):
        self.obs1_buf[self.ptr] = obs
        self.obs2_buf[self.ptr] = next_obs
        self.acts_buf[self.ptr] = act
        self.rews_buf[self.ptr] = rew
        self.done_buf[self.ptr] = done
        self.ptr = (self.ptr+1) % self.max_size
        self.size = min(self.size+1, self.max_size)

    def sample_batch(self, batch_size=32):
        idxs = np.random.randint(0, self.size, size=batch_size)
        return dict(obs1=self.obs1_buf[idxs],
                    obs2=self.obs2_buf[idxs],
                    acts=self.acts_buf[idxs],
                    rews=self.rews_buf[idxs],
                    done=self.done_buf[idxs])
    

exp_data_size = 1000    
env_name = 'HalfCheetah-v2'
demo_list = np.load('exp_'+env_name+'.npy')

env = gym.make(env_name)
obs_dim = env.observation_space.high.shape[0]
act_dim = env.action_space.high.shape[0]
exp_replay_buffer = ReplayBuffer(obs_dim=obs_dim, act_dim=act_dim, size=exp_data_size)

for demo in demo_list:
    for t in range(len(demo['rew'])):
        o = demo['obs'][t]
        a = demo['act'][t]
        r = demo['rew'][t]
        d = demo['done'][t]
        o2 = demo['obs'][t+1]
        exp_replay_buffer.store(o,a,r,o2,d)