In [1]:
#|hide
#|eval: false
! [ -e /content ] && pip install -Uqq fastrl['dev'] pyvirtualdisplay && \
                     apt-get install -y xvfb python-opengl > /dev/null 2>&1 
# NOTE: IF YOU SEE VERSION ERRORS, IT IS SAFE TO IGNORE THEM. COLAB IS BEHIND IN SOME OF THE PACKAGE VERSIONS

In [2]:
#|hide
from fastcore.imports import in_colab
# Since colab still requires tornado<6, we don't want to import nbdev if we don't have to
if not in_colab():
    from nbdev.showdoc import *
    from nbdev.imports import *
    if not os.environ.get("IN_TEST", None):
        assert IN_NOTEBOOK
        assert not IN_COLAB
        assert IN_IPYTHON
else:
    # Virutual display is needed for colab
    from pyvirtualdisplay import Display
    display = Display(visible=0, size=(400, 300))
    display.start()

In [3]:
#|export
# Python native modules
import os,warnings,logging,typing
# Third party libs
from fastcore.all import *
from fastai.torch_core import *
from fastai.basics import *
from torch.utils.tensorboard import SummaryWriter
import pandas as pd
import torch
import numpy as np
# Local modules

_logger = logging.getLogger()
_li = _logger.info

In [4]:
#|default_exp core

# Core
> Core libs for fastrl

In [5]:
#|export
@call_parse
def fastrl_make_requirements(
    path:Path=None, # The path to a dir with the settings.ini, if none, cwd.
    project_file:str='settings.ini', # The file to load for reading the requirements
    out_path:Path=None # The output path (can be relative to `path`)
):
    logging.basicConfig()
    requirement_types = ['','dev_','pip_']
    path = ifnone(path, Path.cwd())/project_file

    if not path.exists(): raise OSError(f'File {path} does not exist')

    out_path = ifnone(out_path, Path('extra'))
    out_path = out_path if out_path.is_absolute() else path.parent/out_path
    out_path.mkdir(parents=True, exist_ok=True)
    _li('Outputting to path: %s',out_path)
    config = Config(path.parent,path.name)

    for req in requirement_types:
        requirements = config[req+'requirements']

        requirements = requirements.replace(' ','\n')

        Path(out_path/(req+'requirements.txt')).write_text(requirements)

In [6]:
#|export
def _fmt_fld(t:typing.Tuple[str,type],namedtuple):
    default_v = ''
    if t[0] in namedtuple._field_defaults:
        default_v = f' = `{namedtuple._field_defaults[t[0]]}`'
    return ' - **%s**:`%s` '%t+default_v+getattr(namedtuple,t[0]).__doc__

def add_namedtuple_doc(
    t:typing.NamedTuple, # Primary tuple to get docs from
    doc:str, # Primary doc for the overall tuple, where the docs for individual fields will be concated.
    **fields_docs:dict # Field names with associated docs to be attached in the format: field_a='some documentation'
):
    "Add docs to `t` from `doc` along with individual doc fields `fields_docs`"
    if not hasattr(t,'__base_doc__'): t.__base_doc__ = doc
    for k,v in fields_docs.items(): getattr(t,k).__doc__ = v
    # TODO: can we add optional default fields also?
    flds = L(t.__annotations__.items()).map(_fmt_fld,namedtuple=t)
    
    s = 'Parameters:\n'+'\n'.join(flds)
    t.__doc__ = doc + '\n\n' + s    

In [7]:
#|export
class SimpleStep(typing.NamedTuple):
    state:       torch.FloatTensor=torch.FloatTensor([0])
    action:      torch.FloatTensor=torch.FloatTensor([0])
    next_state:  torch.FloatTensor=torch.FloatTensor([0])
    terminated:  torch.BoolTensor=torch.BoolTensor([1])
    truncated:   torch.BoolTensor=torch.BoolTensor([1])
    reward:      torch.FloatTensor=torch.LongTensor([0])
    total_reward:torch.FloatTensor=torch.FloatTensor([0])
    env_id:      torch.LongTensor=torch.LongTensor([0])
    proc_id:     torch.LongTensor=torch.LongTensor([0])
    step_n:      torch.LongTensor=torch.LongTensor([0])
    episode_n:   torch.LongTensor=torch.LongTensor([0])
    image:       torch.FloatTensor=torch.FloatTensor([0])
    
    @classmethod
    def random(cls,seed=None,**flds):
        _flds,_annos = cls._fields,cls.__annotations__

        def _random_annos(anno):
            t = anno(1)
            if anno==torch.BoolTensor: t.random_(2) 
            else:                      t.random_(100)
            return t

        return cls(
            *(flds.get(
                f,_random_annos(_annos[f])
            ) for f in _flds)
        )

add_namedtuple_doc(
    SimpleStep,
    'Represents a single step in an environment.',
    state = 'Both the initial state of the environment and the previous state.',
    next_state = 'Both the next state, and the last state in the environment',
    terminated = """Represents an ending condition for an environment such as reaching a goal or 'living long enough' as 
                    described by the MDP.
                    Good reference is: https://github.com/openai/gym/blob/39b8661cb09f19cb8c8d2f59b57417517de89cb0/gym/core.py#L151-L155""",
    truncated = """Represents an ending condition for an environment that can be seen as an out of bounds condition either
                   literally going out of bounds, breaking rules, or exceeding the timelimit allowed by the MDP.
                   Good reference is: https://github.com/openai/gym/blob/39b8661cb09f19cb8c8d2f59b57417517de89cb0/gym/core.py#L151-L155'""",
    reward = 'The single reward for this step.',
    total_reward = 'The total accumulated reward for this episode up to this step.',
    action = 'The action that was taken to transition from `state` to `next_state`',
    env_id = 'The environment this step came from (useful for debugging)',
    proc_id = 'The process this step came from (useful for debugging)',
    step_n = 'The step number in a given episode.',
    episode_n = 'The episode this environment is currently running through.',
    image = """Intended for display and logging only. If the intention is to use images for training an
               agent, then use a env wrapper instead."""
)

In [8]:
show_doc(SimpleStep)

---

### SimpleStep

>      SimpleStep (state:torch.FloatTensor=tensor([0.]),
>                  action:torch.FloatTensor=tensor([0.]),
>                  next_state:torch.FloatTensor=tensor([0.]),
>                  terminated:torch.BoolTensor=tensor([True]),
>                  truncated:torch.BoolTensor=tensor([True]),
>                  reward:torch.FloatTensor=tensor([0]),
>                  total_reward:torch.FloatTensor=tensor([0.]),
>                  env_id:torch.LongTensor=tensor([0]),
>                  proc_id:torch.LongTensor=tensor([0]),
>                  step_n:torch.LongTensor=tensor([0]),
>                  episode_n:torch.LongTensor=tensor([0]),
>                  image:torch.FloatTensor=tensor([0.]))

Represents a single step in an environment.

Now we can generate a couple to send their a pytorch data loader.

In [9]:
torch.manual_seed(0)
SimpleStep.random(state=torch.FloatTensor(2).fill_(0))

SimpleStep(state=tensor([0., 0.]), action=tensor([39.]), next_state=tensor([33.]), terminated=tensor([False]), truncated=tensor([True]), reward=tensor([79.]), total_reward=tensor([27.]), env_id=tensor([3]), proc_id=tensor([97]), step_n=tensor([83]), episode_n=tensor([1]), image=tensor([66.]))

In [10]:
from torch.utils.data.dataloader_experimental import DataLoader2
import torchdata.datapipes as dp

def seed_worker(worker_id): torch.manual_seed(0)
def random_step_generator(): 
    while True: yield SimpleStep.random()
    

pipe = dp.iter.IterableWrapper(random_step_generator(),deepcopy=False)
pipe = pipe.batch(batch_size=3)

g = torch.Generator()
g.manual_seed(0)
dl = DataLoader2(pipe,num_workers=2,worker_init_fn=seed_worker)

for o in dl:
    print(o)
    break

[SimpleStep(state=tensor([[44.]]), action=tensor([[39.]]), next_state=tensor([[33.]]), terminated=tensor([[False]]), truncated=tensor([[True]]), reward=tensor([[79.]]), total_reward=tensor([[27.]]), env_id=tensor([[3]]), proc_id=tensor([[97]]), step_n=tensor([[83]]), episode_n=tensor([[1]]), image=tensor([[66.]])), SimpleStep(state=tensor([[56.]]), action=tensor([[99.]]), next_state=tensor([[78.]]), terminated=tensor([[False]]), truncated=tensor([[False]]), reward=tensor([[68.]]), total_reward=tensor([[94.]]), env_id=tensor([[33]]), proc_id=tensor([[26]]), step_n=tensor([[19]]), episode_n=tensor([[91]]), image=tensor([[54.]])), SimpleStep(state=tensor([[24.]]), action=tensor([[41.]]), next_state=tensor([[69.]]), terminated=tensor([[True]]), truncated=tensor([[True]]), reward=tensor([[80.]]), total_reward=tensor([[81.]]), env_id=tensor([[12]]), proc_id=tensor([[63]]), step_n=tensor([[60]]), episode_n=tensor([[95]]), image=tensor([[85.]]))]


In [11]:
#|export
StepType = (SimpleStep,)

## Some Important Notes

In [12]:
## Numpy to Tensor Performance

In [13]:
img=np.random.randint(0,255,size=(240, 320, 3))

In [1]:
%%timeit
#|eval: false
img=np.random.randint(0,255,size=(240, 320, 3))

UsageError: Line magic function `%%timeit` not found.


In [15]:
%%timeit
#|eval: false
deepcopy(img)

60.6 µs ± 2.35 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [16]:
%%timeit
#|eval: false
Tensor(img)

43.8 µs ± 8.45 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [17]:
%%timeit
#|eval: false
Tensor([img])

  """Entry point for launching an IPython kernel.


104 ms ± 1.87 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


You will notice that if you wrap a numpy in a list, it completely kills the performance. The solution is to
just add a batch dim to the existing array and pass it directly.

In [18]:
%%timeit
#|eval: false
Tensor(np.expand_dims(img,0))

48.7 µs ± 468 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In fact we can just test this with python lists...

In [19]:
%%timeit
#|eval: false
Tensor([[1]])

3.38 µs ± 20.8 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [20]:
test_arr=[[1]*270000]

In [21]:
%%timeit
#|eval: false
Tensor(test_arr)

9.49 ms ± 43.8 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [22]:
test_arr=np.array([[1]*270000])

In [23]:
%%timeit
#|eval: false
Tensor(test_arr)

54.5 µs ± 6.55 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)


This is horrifying just how made of a performance hit this causes... So we will be avoiding python list inputs 
to Tensors for now on...

## Testing
> Additional utilities for testing anything

In [24]:
#|export
def test_in(a,b):
    "`test` that `a in b`"
    test(a,b,in_, ' in ')

In [25]:
test_in('o','hello')
test_in(3,[1,2,3,4])

In [26]:
#|export
def _len_check(a,b): 
    return len(a)==(len(b) if not isinstance(b,int) else b)

def test_len(a,b,meta_info=''):
    "`test` that `len(a) == int(b) or len(a) == len(b)`"
    test(a,b,_len_check, f' len == len {meta_info}')

In [27]:
test_len([1,2,3],3)
test_len([1,2,3],[1,2,3])
test_len([1,2,3],'123')
test_fail(lambda:test_len([1,2,3],'1234'))

In [28]:
#|export
def _less_than(a,b): return a < b
def test_lt(a,b):
    "`test` that `a < b`"
    test(a,b,_less_than, ' a < b')

In [29]:
test_lt(4,5)
test_fail(lambda:test_lt(5,4))

## Basic Visualization

In [30]:
#|export
def run_tensorboard(port=6006, # The port to run tensorboard on/connect on
                    start_tag=None, # Starting regex e.g.: experience_replay/1
                    samples_per_plugin=None, # Sampling freq such as  images=0 (keep all)
                    extra_args=None, # Any additional arguments in the `--arg value` format
                    rm_glob=None # Remove old logs via a parttern e.g.: '*' will remove all files: runs/* 
                   ):
    if rm_glob is not None:
        for p in Path('runs').glob(rm_glob): p.delete()
    import socket
    from tensorboard import notebook
    a_socket=socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    cmd=None
    if not a_socket.connect_ex(('127.0.0.1',6006)):
        notebook.display(port=port,height=1000)
    else:
        cmd=f'--logdir runs --port {port} --host=0.0.0.0'
        if samples_per_plugin is not None: cmd+=f' --samples_per_plugin {samples_per_plugin}'
        if start_tag is not None:          cmd+=f' --tag {start_tag}'
        if extra_args is not None:         cmd+=f' {extra_args}'
        notebook.start(cmd)
    return cmd

In [31]:
#|hide
SHOW_TENSOR_BOARD=False
if not os.environ.get("IN_TEST", None) and SHOW_TENSOR_BOARD:
    run_tensorboard()

In [2]:
#|hide
from fastcore.imports import in_colab

# Since colab still requires tornado<6, we don't want to import nbdev if we don't have to
if not in_colab():
    from nbdev import nbdev_export
    nbdev_export()