In [1]:
#|hide
#|eval: false
! [ -e /content ] && pip install -Uqq fastrl['dev'] pyvirtualdisplay && \
                     apt-get install -y xvfb python-opengl > /dev/null 2>&1 
# NOTE: IF YOU SEE VERSION ERRORS, IT IS SAFE TO IGNORE THEM. COLAB IS BEHIND IN SOME OF THE PACKAGE VERSIONS

In [2]:
#|hide
from fastcore.imports import in_colab
# Since colab still requires tornado<6, we don't want to import nbdev if we don't have to
if not in_colab():
    from nbdev.showdoc import *
    from nbdev.imports import *
    if not os.environ.get("IN_TEST", None):
        assert IN_NOTEBOOK
        assert not IN_COLAB
        assert IN_IPYTHON
else:
    # Virutual display is needed for colab
    from pyvirtualdisplay import Display
    display = Display(visible=0, size=(400, 300))
    display.start()

In [3]:
#|export
# Python native modules
import os,warnings,typing
# Third party libs
from fastcore.all import *
from fastai.torch_core import *
from fastai.basics import *
import pandas as pd
import torch
import numpy as np
# Local modules

In [4]:
#|default_exp core

# Core
> Core libs for fastrl

## Primitives
> `StepTypes` are generated by environments and used by RL models for training / execution.

In [5]:
#|export
def _fmt_fld(t:typing.Tuple[str,type],namedtuple):
    default_v = ''
    if t[0] in namedtuple._field_defaults:
        default_v = f' = `{namedtuple._field_defaults[t[0]]}`'
    return ' - **%s**:`%s` '%t+default_v+getattr(namedtuple,t[0]).__doc__

def add_namedtuple_doc(
    t:typing.NamedTuple, # Primary tuple to get docs from
    doc:str, # Primary doc for the overall tuple, where the docs for individual fields will be concated.
    **fields_docs:dict # Field names with associated docs to be attached in the format: field_a='some documentation'
):
    "Add docs to `t` from `doc` along with individual doc fields `fields_docs`"
    if not hasattr(t,'__base_doc__'): t.__base_doc__ = doc
    for k,v in fields_docs.items(): getattr(t,k).__doc__ = v
    # TODO: can we add optional default fields also?
    flds = L(t.__annotations__.items()).map(_fmt_fld,namedtuple=t)
    
    s = 'Parameters:\n'+'\n'.join(flds)
    t.__doc__ = doc + '\n\n' + s    

In [24]:
#|export
class SimpleStep(typing.NamedTuple):
    state:       torch.FloatTensor=torch.FloatTensor([0])
    action:      torch.FloatTensor=torch.FloatTensor([0])
    next_state:  torch.FloatTensor=torch.FloatTensor([0])
    terminated:  torch.BoolTensor=torch.BoolTensor([1])
    truncated:   torch.BoolTensor=torch.BoolTensor([1])
    reward:      torch.FloatTensor=torch.LongTensor([0])
    total_reward:torch.FloatTensor=torch.FloatTensor([0])
    env_id:      torch.LongTensor=torch.LongTensor([0])
    proc_id:     torch.LongTensor=torch.LongTensor([0])
    step_n:      torch.LongTensor=torch.LongTensor([0])
    episode_n:   torch.LongTensor=torch.LongTensor([0])
    image:       torch.FloatTensor=torch.FloatTensor([0])
    
    def clone(self):
        return self.__class__(
            **{fld:getattr(self,fld).clone() for fld in self.__class__._fields}
        )
    
    def detach(self):
        return self.__class__(
            **{fld:getattr(self,fld).detach() for fld in self.__class__._fields}
        )
    
    def device(self,device='cpu'):
        return self.__class__(
            **{fld:getattr(self,fld).to(device=device) for fld in self.__class__._fields}
        )
    
    @classmethod
    def random(cls,seed=None,**flds):
        _flds,_annos = cls._fields,cls.__annotations__

        def _random_annos(anno):
            t = anno(1)
            if anno==torch.BoolTensor: t.random_(2) 
            else:                      t.random_(100)
            return t

        return cls(
            *(flds.get(
                f,_random_annos(_annos[f])
            ) for f in _flds)
        )

add_namedtuple_doc(
    SimpleStep,
    'Represents a single step in an environment.',
    state = 'Both the initial state of the environment and the previous state.',
    next_state = 'Both the next state, and the last state in the environment',
    terminated = """Represents an ending condition for an environment such as reaching a goal or 'living long enough' as 
                    described by the MDP.
                    Good reference is: https://github.com/openai/gym/blob/39b8661cb09f19cb8c8d2f59b57417517de89cb0/gym/core.py#L151-L155""",
    truncated = """Represents an ending condition for an environment that can be seen as an out of bounds condition either
                   literally going out of bounds, breaking rules, or exceeding the timelimit allowed by the MDP.
                   Good reference is: https://github.com/openai/gym/blob/39b8661cb09f19cb8c8d2f59b57417517de89cb0/gym/core.py#L151-L155'""",
    reward = 'The single reward for this step.',
    total_reward = 'The total accumulated reward for this episode up to this step.',
    action = 'The action that was taken to transition from `state` to `next_state`',
    env_id = 'The environment this step came from (useful for debugging)',
    proc_id = 'The process this step came from (useful for debugging)',
    step_n = 'The step number in a given episode.',
    episode_n = 'The episode this environment is currently running through.',
    image = """Intended for display and logging only. If the intention is to use images for training an
               agent, then use a env wrapper instead."""
)

In [25]:
show_doc(SimpleStep)

---

### SimpleStep

>      SimpleStep (state:torch.FloatTensor=tensor([0.]),
>                  action:torch.FloatTensor=tensor([0.]),
>                  next_state:torch.FloatTensor=tensor([0.]),
>                  terminated:torch.BoolTensor=tensor([True]),
>                  truncated:torch.BoolTensor=tensor([True]),
>                  reward:torch.FloatTensor=tensor([0]),
>                  total_reward:torch.FloatTensor=tensor([0.]),
>                  env_id:torch.LongTensor=tensor([0]),
>                  proc_id:torch.LongTensor=tensor([0]),
>                  step_n:torch.LongTensor=tensor([0]),
>                  episode_n:torch.LongTensor=tensor([0]),
>                  image:torch.FloatTensor=tensor([0.]))

Represents a single step in an environment.

Parameters:
 - **state**:`<class 'torch.FloatTensor'>`  = `tensor([0.])`Both the initial state of the environment and the previous state.
 - **action**:`<class 'torch.FloatTensor'>`  = `tensor([0.])`The action that was taken to transition from `state` to `next_state`
 - **next_state**:`<class 'torch.FloatTensor'>`  = `tensor([0.])`Both the next state, and the last state in the environment
 - **terminated**:`<class 'torch.BoolTensor'>`  = `tensor([True])`Represents an ending condition for an environment such as reaching a goal or 'living long enough' as 
                    described by the MDP.
                    Good reference is: https://github.com/openai/gym/blob/39b8661cb09f19cb8c8d2f59b57417517de89cb0/gym/core.py#L151-L155
 - **truncated**:`<class 'torch.BoolTensor'>`  = `tensor([True])`Represents an ending condition for an environment that can be seen as an out of bounds condition either
                   literally going out of bounds, breaking rules, or exceeding the timelimit allowed by the MDP.
                   Good reference is: https://github.com/openai/gym/blob/39b8661cb09f19cb8c8d2f59b57417517de89cb0/gym/core.py#L151-L155'
 - **reward**:`<class 'torch.FloatTensor'>`  = `tensor([0])`The single reward for this step.
 - **total_reward**:`<class 'torch.FloatTensor'>`  = `tensor([0.])`The total accumulated reward for this episode up to this step.
 - **env_id**:`<class 'torch.LongTensor'>`  = `tensor([0])`The environment this step came from (useful for debugging)
 - **proc_id**:`<class 'torch.LongTensor'>`  = `tensor([0])`The process this step came from (useful for debugging)
 - **step_n**:`<class 'torch.LongTensor'>`  = `tensor([0])`The step number in a given episode.
 - **episode_n**:`<class 'torch.LongTensor'>`  = `tensor([0])`The episode this environment is currently running through.
 - **image**:`<class 'torch.FloatTensor'>`  = `tensor([0.])`Intended for display and logging only. If the intention is to use images for training an
               agent, then use a env wrapper instead.

Now we can generate a couple to send their a pytorch data loader.

In [26]:
torch.manual_seed(0)
SimpleStep.random(state=torch.FloatTensor(2).fill_(0))

SimpleStep(state=tensor([0., 0.]), action=tensor([39.]), next_state=tensor([33.]), terminated=tensor([False]), truncated=tensor([True]), reward=tensor([79.]), total_reward=tensor([27.]), env_id=tensor([3]), proc_id=tensor([97]), step_n=tensor([83]), episode_n=tensor([1]), image=tensor([66.]))

In [27]:
SimpleStep.random(state=torch.FloatTensor(2).fill_(0)).clone()

SimpleStep(state=tensor([0., 0.]), action=tensor([99.]), next_state=tensor([78.]), terminated=tensor([False]), truncated=tensor([False]), reward=tensor([68.]), total_reward=tensor([94.]), env_id=tensor([33]), proc_id=tensor([26]), step_n=tensor([19]), episode_n=tensor([91]), image=tensor([54.]))

In [9]:
from torch.utils.data.dataloader_experimental import DataLoader2
import torchdata.datapipes as dp

def seed_worker(worker_id): torch.manual_seed(0)
def random_step_generator(): 
    while True: yield SimpleStep.random()
    

pipe = dp.iter.IterableWrapper(random_step_generator(),deepcopy=False)
pipe = pipe.batch(batch_size=3)

g = torch.Generator()
g.manual_seed(0)
dl = DataLoader2(pipe,num_workers=2,worker_init_fn=seed_worker)

for o in dl:
    print(o)
    break

[SimpleStep(state=tensor([[44.]]), action=tensor([[39.]]), next_state=tensor([[33.]]), terminated=tensor([[False]]), truncated=tensor([[True]]), reward=tensor([[79.]]), total_reward=tensor([[27.]]), env_id=tensor([[3]]), proc_id=tensor([[97]]), step_n=tensor([[83]]), episode_n=tensor([[1]]), image=tensor([[66.]])), SimpleStep(state=tensor([[56.]]), action=tensor([[99.]]), next_state=tensor([[78.]]), terminated=tensor([[False]]), truncated=tensor([[False]]), reward=tensor([[68.]]), total_reward=tensor([[94.]]), env_id=tensor([[33]]), proc_id=tensor([[26]]), step_n=tensor([[19]]), episode_n=tensor([[91]]), image=tensor([[54.]])), SimpleStep(state=tensor([[24.]]), action=tensor([[41.]]), next_state=tensor([[69.]]), terminated=tensor([[True]]), truncated=tensor([[True]]), reward=tensor([[80.]]), total_reward=tensor([[81.]]), env_id=tensor([[12]]), proc_id=tensor([[63]]), step_n=tensor([[60]]), episode_n=tensor([[95]]), image=tensor([[85.]]))]


In [10]:
#|export
StepType = (SimpleStep,)

## Testing
> Additional utilities for testing anything

In [11]:
#|export
def test_in(a,b):
    "`test` that `a in b`"
    test(a,b,in_, ' in ')

In [12]:
test_in('o','hello')
test_in(3,[1,2,3,4])

In [13]:
#|export
def _len_check(a,b): 
    return len(a)==(len(b) if not isinstance(b,int) else b)

def test_len(a,b,meta_info=''):
    "`test` that `len(a) == int(b) or len(a) == len(b)`"
    test(a,b,_len_check, f' len == len {meta_info}')

In [14]:
test_len([1,2,3],3)
test_len([1,2,3],[1,2,3])
test_len([1,2,3],'123')
test_fail(lambda:test_len([1,2,3],'1234'))

In [15]:
#|export
def _less_than(a,b): return a < b
def test_lt(a,b):
    "`test` that `a < b`"
    test(a,b,_less_than, ' a < b')

In [16]:
test_lt(4,5)
test_fail(lambda:test_lt(5,4))

In [2]:
#|hide
from fastcore.imports import in_colab

# Since colab still requires tornado<6, we don't want to import nbdev if we don't have to
if not in_colab():
    from nbdev import nbdev_export
    nbdev_export()