In [1]:
#hide
#skip
%config Completer.use_jedi = False
%config IPCompleter.greedy=True
# upgrade fastrl on colab
! [ -e /content ] && pip install -Uqq fastrl['dev'] pyvirtualdisplay && \
                     apt-get install -y xvfb python-opengl > /dev/null 2>&1 
# NOTE: IF YOU SEE VERSION ERRORS, IT IS SAFE TO IGNORE THEM. COLAB IS BEHIND IN SOME OF THE PACKAGE VERSIONS

In [2]:
# hide
from fastcore.imports import in_colab
# Since colab still requires tornado<6, we don't want to import nbdev if we don't have to
if not in_colab():
    from nbdev.showdoc import *
    from nbdev.imports import *
    if not os.environ.get("IN_TEST", None):
        assert IN_NOTEBOOK
        assert not IN_COLAB
        assert IN_IPYTHON
else:
    # Virutual display is needed for colab
    from pyvirtualdisplay import Display
    display = Display(visible=0, size=(400, 300))
    display.start()

In [3]:
# default_exp envs.core

In [4]:
# export
# Python native modules
import os
# Third party libs
from fastcore.all import *
import torchdata.datapipes as dp
import typing
from fastai.torch_basics import *
from fastai.torch_core import *
# Local modules
from fastrl.fastai.data.pipes.core import *
from fastrl.fastai.data.load import *
from fastrl.fastai.data.block import *

# Envs Core
> Core Fastrl API for working with envs and plugging them into the larger ecosystem. These include:
`Step`, generic DataPipes for env running,

In [5]:
# export
class Flattener(dp.iter.IterDataPipe):
    "Takes nested lists and unwraps them yielding 1 element at a time."
    def __init__(self, source_datapipe) -> None:
        self.source_datapipe = source_datapipe
    
    def __iter__(self):
        for list_like_element in self.source_datapipe:
            if not is_listy(list_like_element):
                raise Exception(f'Expected listy object got {type(list_like_element)}\n{list_like_element}')
            yield from (o for o in list_like_element)

Below we have `elements` that is a list of 3 lists. We want to flatten these into a single ordered list.

In [6]:
elements = [list(range(10)),list(range(10,15)),list(range(15,20))]
pipe = dp.iter.IterableWrapper(elements)
pipe = Flattener(pipe)
test_eq(
    list(pipe),
    list(range(10))+list(range(10,15))+list(range(15,20))
)

In [179]:
# export
def _fmt_fld(t:typing.Tuple[str,type],namedtuple):
    default_v = ''
    if t[0] in namedtuple._field_defaults:
        default_v = f' = `{namedtuple._field_defaults[t[0]]}`'
    return ' - **%s**:`%s` '%t+default_v+getattr(namedtuple,t[0]).__doc__

def add_namedtuple_doc(
    t:typing.NamedTuple, # Primary tuple to get docs from
    doc:str, # Primary doc for the overall tuple, where the docs for individual fields will be concated.
    **fields_docs:dict # Field names with associated docs to be attached in the format: field_a='some documentation'
):
    "Add docs to `t` from `doc` along with individual doc fields `fields_docs`"
    if not hasattr(t,'__base_doc__'): t.__base_doc__ = doc
    for k,v in fields_docs.items(): getattr(t,k).__doc__ = v
    # TODO: can we add optional default fields also?
    flds = L(t.__annotations__.items()).map(_fmt_fld,namedtuple=t)
    
    s = 'Parameters:\n'+'\n'.join(flds)
    t.__doc__ = doc + '\n\n' + s    

In [260]:
# export
class SimpleStep(typing.NamedTuple):
    state:       torch.FloatTensor
    next_state:  torch.FloatTensor
    action:      torch.FloatTensor
    done:        torch.BoolTensor=torch.BoolTensor([1])
    reward:      torch.FloatTensor=torch.LongTensor([0])
    total_reward:torch.FloatTensor=torch.FloatTensor([0])
    env_id:      torch.LongTensor=torch.LongTensor([0])
    proc_id:     torch.LongTensor=torch.LongTensor([0])
    step_n:      torch.LongTensor=torch.LongTensor([0])
    episode_n:   torch.LongTensor=torch.LongTensor([0])
    
    @classmethod
    def random(cls,seed=None,**flds):
        _flds,_annos = cls._fields,cls.__annotations__

        def _random_annos(anno):
            t = anno(1)
            if anno==torch.BoolTensor: t.random_(2) 
            else:                      t.random_(100)
            return t

        return cls(
            *(flds.get(
                f,_random_annos(_annos[f])
            ) for f in _flds)
        )

add_namedtuple_doc(
    SimpleStep,
    'Represents a single step in an environment.',
    state = 'Both the initial state of the environment and the previous state.',
    next_state = 'Both the next state, and the last state in the environment',
    done = 'Whether this step represents the end of an episode.',
    reward = 'The single reward for this step.',
    total_reward = 'The total accumulated reward for this episode up to this step.',
    action = 'The action that was taken to transition from `state` to `next_state`',
    env_id = 'The environment this step came from (useful for debugging)',
    proc_id = 'The process this step came from (useful for debugging)',
    step_n = 'The step number in a given episode.',
    episode_n = 'The episode this environment is currently running through.'
)

In [262]:
show_doc(SimpleStep)

<h2 id="SimpleStep" class="doc_header"><code>class</code> <code>SimpleStep</code><a href="" class="source_link" style="float:right">[source]</a></h2>

> <code>SimpleStep</code>(**`state`**:`FloatTensor`, **`next_state`**:`FloatTensor`, **`done`**:`BoolTensor`, **`action`**:`FloatTensor`, **`reward`**:`FloatTensor`=*`tensor([0])`*, **`total_reward`**:`FloatTensor`=*`tensor([0.])`*, **`env_id`**:`LongTensor`=*`tensor([0])`*, **`proc_id`**:`LongTensor`=*`tensor([0])`*, **`step_n`**:`LongTensor`=*`tensor([0])`*, **`episode_n`**:`LongTensor`=*`tensor([0])`*) :: `tuple`

Represents a single step in an environment.

Parameters:
 - **state**:`<class 'torch.FloatTensor'>` Both the initial state of the environment and the previous state.
 - **next_state**:`<class 'torch.FloatTensor'>` Both the next state, and the last state in the environment
 - **done**:`<class 'torch.BoolTensor'>` Whether this step represents the end of an episode.
 - **action**:`<class 'torch.FloatTensor'>` The action that was taken to transition from `state` to `next_state`
 - **reward**:`<class 'torch.FloatTensor'>`  = `tensor([0])`The single reward for this step.
 - **total_reward**:`<class 'torch.FloatTensor'>`  = `tensor([0.])`The total accumulated reward for this episode up to this step.
 - **env_id**:`<class 'torch.LongTensor'>`  = `tensor([0])`The environment this step came from (useful for debugging)
 - **proc_id**:`<class 'torch.LongTensor'>`  = `tensor([0])`The process this step came from (useful for debugging)
 - **step_n**:`<class 'torch.LongTensor'>`  = `tensor([0])`The step number in a given episode.
 - **episode_n**:`<class 'torch.LongTensor'>`  = `tensor([0])`The episode this environment is currently running through.

Now we can generate a couple to send their a pytorch data loader.

In [316]:
torch.manual_seed(0)
SimpleStep.random(state=torch.FloatTensor(2).fill_(0))

SimpleStep(state=tensor([0., 0.]), next_state=tensor([39.]), done=tensor([True]), action=tensor([60.]), reward=tensor([63.]), total_reward=tensor([79.]), env_id=tensor([27]), proc_id=tensor([3]), step_n=tensor([97]), episode_n=tensor([83]))

In [317]:
SimpleStep.random()

SimpleStep(state=tensor([1.]), next_state=tensor([66.]), done=tensor([False]), action=tensor([99.]), reward=tensor([78.]), total_reward=tensor([76.]), env_id=tensor([56]), proc_id=tensor([68]), step_n=tensor([94]), episode_n=tensor([33]))

In [333]:
from torch.utils.data.dataloader_experimental import DataLoader2

def seed_worker(worker_id): torch.manual_seed(0)
def random_step_generator(): 
    while True: yield SimpleStep.random()
    

pipe = dp.iter.IterableWrapper(random_step_generator(),deepcopy=False)
pipe = pipe.batch(batch_size=3)

g = torch.Generator()
g.manual_seed(0)
dl = DataLoader2(pipe,num_workers=2,worker_init_fn=seed_worker)

for o in dl:
    print(o)
    break

[SimpleStep(state=tensor([[44.]]), next_state=tensor([[39.]]), done=tensor([[True]]), action=tensor([[60.]]), reward=tensor([[63.]]), total_reward=tensor([[79.]]), env_id=tensor([[27]]), proc_id=tensor([[3]]), step_n=tensor([[97]]), episode_n=tensor([[83]])), SimpleStep(state=tensor([[1.]]), next_state=tensor([[66.]]), done=tensor([[False]]), action=tensor([[99.]]), reward=tensor([[78.]]), total_reward=tensor([[76.]]), env_id=tensor([[56]]), proc_id=tensor([[68]]), step_n=tensor([[94]]), episode_n=tensor([[33]])), SimpleStep(state=tensor([[26.]]), next_state=tensor([[19.]]), done=tensor([[True]]), action=tensor([[54.]]), reward=tensor([[24.]]), total_reward=tensor([[41.]]), env_id=tensor([[69]]), proc_id=tensor([[69]]), step_n=tensor([[49]]), episode_n=tensor([[80]]))]


In [335]:
# hide
from fastcore.imports import in_colab

# Since colab still requires tornado<6, we don't want to import nbdev if we don't have to
if not in_colab():
    from nbdev.export import *
    from nbdev.export2html import *
    from nbdev.cli import *
    make_readme()
    notebook2script(silent=True)

converting /home/fastrl_user/fastrl/nbs/index.ipynb to README.md
