In [1]:
#hide
#skip
%config Completer.use_jedi = False
# upgrade fastrl on colab
! [ -e /content ] && pip install -Uqq fastrl['dev'] pyvirtualdisplay && \
                     apt-get install -y xvfb python-opengl > /dev/null 2>&1 
# NOTE: IF YOU SEE VERSION ERRORS, IT IS SAFE TO IGNORE THEM. COLAB IS BEHIND IN SOME OF THE PACKAGE VERSIONS

In [2]:
# hide
from fastcore.imports import in_colab
# Since colab still requires tornado<6, we don't want to import nbdev if we don't have to
if not in_colab():
    from nbverbose.showdoc import *
    from nbdev.imports import *
    if not os.environ.get("IN_TEST", None):
        assert IN_NOTEBOOK
        assert not IN_COLAB
        assert IN_IPYTHON
else:
    # Virutual display is needed for colab
    from pyvirtualdisplay import Display
    display = Display(visible=0, size=(400, 300))
    display.start()

In [3]:
# default_exp memory.experience_replay

In [4]:
# export
# Python native modules
import os
from typing import *
# Third party libs
from fastcore.all import *
from fastai.learner import *
from fastai.torch_basics import *
from fastai.torch_core import *
from fastai.callback.all import *
# Local modules
from fastrl.core import *
from fastrl.callback.core import *
from fastrl.data.block import *

  return torch._C._cuda_getDeviceCount() > 0


# Experience Replay
> Experience Replay is likely the simplest form of memory used by RL agents. 

In [5]:
# export
class ExperienceReplayException(Exception): pass

class ExperienceReplay(object):
    def __init__(self,
                 bs=16,         # Number of entries to query from memory
                 max_sz=200,    # Maximum number of entries to hold. Will start overwriting after.
                 warmup_sz=100  # Minimum number of entries needed to continue with a batch
                 ):
        "Stores `BD`s in a rotating list `self.memory`"
        store_attr()
        test_lt(warmup_sz-1,max_sz)
        self.memory=None
        self.pointer=0
    
    def __add__(self,other:BD):
        "In-place add `other` to memory, overwriting if len(self.memory)>self.max_sz"
        if isinstance(other,tuple) and len(other)==1: other=other[0]
        elif isinstance(other,tuple):                 raise ExperienceReplayException('records need to be `BD`s or 1 element tuples')
        if isinstance(other,dict): other=BD(other)
        
        if self.memory is None: 
            if other.bs()>self.max_sz: 
                self.memory=other[:self.max_sz]
                self.pointer=0           # Keep the pointer 0 since we have basically replaced the memory
                self+other[self.max_sz:] # Recursively add the rest of the batch
            else:
                self.memory=other
                self.pointer=self.memory.bs() # remember that pointer is not an index but number of elements
        else:
            n_over=(other.bs()+self.pointer)-self.max_sz
            if n_over>0: # e.g.: max_sz 200, pointer 195, other is 5.
                self.memory=self.memory[:self.pointer]+other[:-n_over]
                self.pointer=0
                self+other[other.bs()-n_over:]
            else:
                # If the number of elements is not over
                next_pointer=self.pointer+other.bs()
                self.memory=self.memory[:self.pointer]+other+self.memory[next_pointer:]
                self.pointer=next_pointer
        return self
    
    def __radd__(self,other:BD): raise ExperienceReplayException('You can only do experience_reply+[some other element]')
    
    def __len__(self): return self.memory.bs() if self.memory is not None else 0
        
    def sample(self)->BD:
        "Returns a sample of size `self.bs`"
        with torch.no_grad():
            idxs=np.random.randint(0,self.memory.bs(),self.bs).tolist()
            samples=self.memory[idxs].mapv(to_device)
        
        if self.memory.bs()<self.warmup_sz: raise CancelBatchException
        return samples

lets generate some batches to test with...

In [7]:
from fastrl.data.gym import *
source=Source(
    cbs=[GymLoop(env_name='CartPole-v1',steps_delta=1,steps_count=1,seed=0),FirstLast]
)
source=Source(cbs=[GymLoop(env_name='CartPole-v1',steps_delta=1,steps_count=1,seed=0),FirstLast])
learn=fake_gym_learner(source,n=1000,bs=5)
batches=[BD(b[0]) for b in learn.dls[0]]

Could not do one pass in your dataloader, there is something wrong in it


In [8]:
experience_replay=ExperienceReplay(max_sz=20,warmup_sz=19)
test_len(experience_replay,0)

**what if we fill up ER?**
Lets add the batches, this process will happen inplace...

In [9]:
experience_replay+batches[0]
test_eq(experience_replay.pointer,5)
test_len(experience_replay,5)

If we add again, the total size should be 10...

In [10]:
experience_replay+batches[1]
test_eq(experience_replay.pointer,10)
test_len(experience_replay,10)
test_eq(experience_replay.memory['step'],(batches[0]+batches[1])['step'])

In [11]:
experience_replay+batches[2]
test_len(experience_replay,15)
test_eq(experience_replay.pointer,15)
test_eq(experience_replay.memory['step'],(batches[0]+batches[1]+batches[2])['step'])

In [12]:
experience_replay+batches[3]
test_len(experience_replay,20)
test_eq(experience_replay.pointer,20)
test_eq(experience_replay.memory['step'],(batches[0]+batches[1]+batches[2]+batches[3])['step'])

Let's verify that the steps are what we expect...

**What if ER is full and we add batches? ** We are at the maximum memory size, we expect that the next batch added should completely
overwrite the first 5 entries...

In [13]:
experience_replay+batches[4]
test_len(experience_replay,20)
test_eq(experience_replay.pointer,5)
test_eq(experience_replay.memory['step'],(batches[4]+batches[1]+batches[2]+batches[3])['step'])

This overwrite should properly overwrite the rest of the entries...

In [14]:
experience_replay+batches[5]+batches[6]+batches[7]
test_eq(experience_replay.memory['step'],(batches[4]+batches[5]+batches[6]+batches[7])['step'])
test_eq(experience_replay.pointer,20)

so we have fully overwritten the memory twice, and so far we can prove that the memory overwritting works. Let's 
see what happens when we append add numbered dictionaries...

In [15]:
experience_replay+batches[8]+batches[9]+batches[10]
test_eq(experience_replay.pointer,15)
test_eq(experience_replay.memory['step'],(batches[8]+batches[9]+batches[10]+batches[7])['step'])

**What if we need to split a batch to fit at the end and beginnging of the memory?** This is a possibly scary part where some of the dictionary needs to be split. Some needs to be allocated to the end of the memory, and
some of it need to be allocated at the start.

In [16]:
single_large_batch=batches[11]+batches[12]
experience_replay+single_large_batch;

In [17]:
test_eq(experience_replay.pointer,5)
test_eq(experience_replay.memory['step'],(batches[12]+batches[9]+batches[10]+batches[11])['step'])

**What if we sample the experience?**

In [18]:
full_memory=(batches[12]+batches[9]+batches[10]+batches[11])
entry_ids=[str(o) for o in torch.hstack((full_memory['step'],full_memory['episode_id']))]
memory_hits=[False]*len(entry_ids)

We should be able to sample enough times that we have sampled **everything**. 
So we test this by sampling, check if that sample has been seen before, and then record that.

In [19]:
for i in range(5):
    res=experience_replay.sample()
    for o in torch.hstack((res['step'],res['episode_id'])):
        memory_hits[entry_ids.index(str(o))]=True
test_eq(all(memory_hits),True)

In [20]:
# export
class ExperienceReplayCallback(Callback):
    @delegates(ExperienceReplay)
    def __init__(self,**kwargs):
        "Stores `BD`s in a rotating list `self.memory`"
        store_attr()
        self.experience_replay=ExperienceReplay(**kwargs)
    
    def after_pred(self):
        "Adds `learn.xb` to memory, then sets `learn.xb=experience_replay.sample()`"
        xb=BD(self.learn.xb[0]).mapv(to_detach)
        self.experience_replay+xb
        
        self.learn.xb=self.experience_replay.sample()

In [21]:
from fastrl.data.gym import *
source=Source(cbs=[GymLoop(env_name='CartPole-v1',steps_delta=1,steps_count=1,seed=0),FirstLast])
learn=fake_gym_learner(source,n=30,bs=10)

Could not do one pass in your dataloader, there is something wrong in it


In [22]:
experience_replay=ExperienceReplayCallback(bs=5,max_sz=20,warmup_sz=11)
experience_replay.learn=learn

In [23]:
for b in learn.dls[0]:
    learn.xb=b
    
    try:
        experience_replay.after_pred()
        print('memory sampled')
    except CancelBatchException:
        print('memory is not full yet!')

memory is not full yet!
memory sampled
memory sampled


In [33]:
# hide
from fastcore.imports import in_colab

# Since colab still requires tornado<6, we don't want to import nbdev if we don't have to
if not in_colab():
    from nbdev.export import *
    from nbdev.export2html import *
    from nbverbose.cli import *
    make_readme()
    notebook2script()
    notebook2html()

converting /home/fastrl_user/fastrl/nbs/index.ipynb to README.md
Converted 00_core.ipynb.
Converted 00_nbdev_extension.ipynb.
Converted 03_callback.core.ipynb.
Converted 04_agent.ipynb.
Converted 05_data.test_async.ipynb.
Converted 05a_data.block.ipynb.
Converted 05b_data.gym.ipynb.
Converted 06a_memory.experience_replay.ipynb.
Converted 10a_agents.dqn.core.ipynb.
Converted 10b_agents.dqn.targets.ipynb.
Converted 10c_agents.dqn.double.ipynb.
Converted 10d_agents.dqn.dueling.ipynb.
Converted 10e_agents.dqn.categorical.ipynb.
Converted 11a_agents.policy_gradient.ppo.ipynb.
Converted 20_test_utils.ipynb.
Converted index.ipynb.
Converted nbdev_template.ipynb.
converting: /home/fastrl_user/fastrl/nbs/06a_memory.experience_replay.ipynb
converting: /home/fastrl_user/fastrl/nbs/10b_agents.dqn.targets.ipynb
