In [1]:
#hide
#skip
%config Completer.use_jedi = False
# upgrade fastrl on colab
! [ -e /content ] && pip install -Uqq fastrl['dev'] pyvirtualdisplay && \
                     apt-get install -y xvfb python-opengl > /dev/null 2>&1 
# NOTE: IF YOU SEE VERSION ERRORS, IT IS SAFE TO IGNORE THEM. COLAB IS BEHIND IN SOME OF THE PACKAGE VERSIONS

In [2]:
# hide
from fastcore.imports import in_colab
# Since colab still requires tornado<6, we don't want to import nbdev if we don't have to
if not in_colab():
    from nbdev.showdoc import *
    from nbdev.imports import *
    if not os.environ.get("IN_TEST", None):
#         assert IN_NOTEBOOK
        assert not IN_COLAB
        assert IN_IPYTHON
else:
    # Virtual display is needed for colab
    from pyvirtualdisplay import Display
    display = Display(visible=0, size=(400, 300))
    display.start()

In [3]:
# default_exp data.block_simple

In [4]:
# export
# Python native modules
import os
from collections import deque
from copy import deepcopy
from time import sleep
# Third party libs
from fastcore.all import *
from fastai.torch_basics import *
from fastai.data.all import *
from fastai.basics import *
from fastai.callback.all import *
from torch.utils.data import Dataset
from torch import nn
import torch
import gym
import numpy as np
# Local modules
from fastrl.core import *
from fastrl.callback.core import *
from fastrl.agent import *

In [None]:
# Test imports
import pybulletgym

# Data Block Simple
> Stripped down simpler environment execution code.

Development of this was helped by [IterableData documentation on multiple workers](https://github.com/pytorch/pytorch/blob/4949eea0ffb60dc81a0a78402fa59fdf68206718/torch/utils/data/dataset.py#L64)

This code is heavily modifed from https://github.com/Shmuma/ptan

Reference for env [semantics related to vectorized environments](https://github.com/openai/universe/blob/master/doc/env_semantics.rst)

Useful links:
- [torch multiprocessing](https://github.com/pytorch/pytorch/blob/a61a8d059efa0fb139a09e479b1a2c8dd1cf1a44/torch/utils/data/dataloader.py#L564)
- [torch worker](https://github.com/pytorch/pytorch/blob/master/torch/utils/data/_utils/worker.py)

This notebook walks through a more advanced usage of the `Loop` class.

In [5]:
# exports
def parse_events(loop:L): 
    return loop.filter(lambda s:in_('event.',s)).map(lambda s:s.replace('event.',''))

## Source 
> The base iterable used for iterating through environments.

In [6]:
# exports
_loop=L(['event.after_create','Start Setup','event.initialize','End Setup',
             'event.before_episodes',
             'Start Episodes',
                 'event.reset',
                 'event.do_action',
                 'event.do_step',
                 'event.render',
                 'event.history',
             'End Episodes',
             'event.after_episodes'
             ])

mk_class('source_events', **parse_events(_loop).map_dict(),
         doc="All possible events as attributes to get tab-completion and typo-proofing")

_all_=['source_events']

In [7]:
# exports
class Source(Loop):
    _loop=_loop
    _events=source_events
    _default='source'    
    end_event=parse_events(_loop)[-1]
    
    @delegates(Loop)
    def __init__(self,cbs=None,**kwargs):
        super().__init__(cbs=cbs,**kwargs)
        store_attr(but='cbs')
        self.idx=0
        
    def after_create(self):
        self('initialize')
        return self
        
    def __iter__(self):
        while True:
            self.idx+=1
            self('do_step')
            yield {'this':torch.full((1,5),self.idx)}

So the `Source` object does a simple loop that returns a dictionary. 
This is going to be similar to what the rest of fastrl will be expecting. 

In [8]:
source=Source()
for x,_ in zip(iter(source),range(10)): print(x)

{'this': tensor([[1, 1, 1, 1, 1]])}
{'this': tensor([[2, 2, 2, 2, 2]])}
{'this': tensor([[3, 3, 3, 3, 3]])}
{'this': tensor([[4, 4, 4, 4, 4]])}
{'this': tensor([[5, 5, 5, 5, 5]])}
{'this': tensor([[6, 6, 6, 6, 6]])}
{'this': tensor([[7, 7, 7, 7, 7]])}
{'this': tensor([[8, 8, 8, 8, 8]])}
{'this': tensor([[9, 9, 9, 9, 9]])}
{'this': tensor([[10, 10, 10, 10, 10]])}


In [9]:
Source().show_loop()

 - after_create   : []
Start Setup
   - initialize     : []
End Setup
 - before_episodes: []
Start Episodes
   - reset          : []
   - do_action      : []
   - do_step        : []
   - render         : []
   - history        : []
End Episodes
 - after_episodes : []


## Base PyTorch
This section covers the basic dataloader in pytorch with the source object. 

In [10]:
from torch.utils.data import DataLoader
source=Source()
try:                   print(list(DataLoader(iter(source),batch_size=10)))
except TypeError as e: print(e)

object of type 'generator' has no len()


Ok so the initial attempt failed. This is because we need to indicate this is an iterable dataset
that contains `items` that are each the `Source` instance. Ok so lets make this an 
iterable dataset.

In [11]:
# exports
class SourceDataset(IterableDataset):
    "Iterates through a `source` object. Allows for re-initing source connections when `num_workers>0`"
    def __init__(self,source=None): self.source=source
    def __iter__(self):             
        source=iter(self.source)
        yield from source

In [12]:
# export
class VerboseChecked(LoopCallback):
    _methods=source_events
    def __init__(self,show_worker_id=True,show_env_id=True):store_attr()
    
    def initialize(self):
        worker_id=get_worker_info()
        worker_id=worker_id.id if worker_id is not None else 0
        if self.show_worker_id: print('Worker id: ',worker_id)
    
        self.env=gym.make('HumanoidPyBulletEnv-v0')
        self.env.reset() 
    def do_step(self):
        if self.show_env_id: print('Env Id: ',id(self.env))

In [13]:
from torch.utils.data import DataLoader,IterableDataset
source=Source(cbs=VerboseChecked)
source.after_create()
dataset=SourceDataset(source)
for x in DataLoader(dataset,batch_size=3):
    print(str(x).replace(' ','').replace('\n',''))
    break

Worker id:  0
WalkerBase::__init__
Env Id:  139847813541264
Env Id:  139847813541264
Env Id:  139847813541264
{'this':tensor([[[1,1,1,1,1]],[[2,2,2,2,2]],[[3,3,3,3,3]]])}


pybullet build time: Jul 19 2021 12:32:28


In [14]:
source=Source(cbs=VerboseChecked)
source.after_create()
dataset=SourceDataset(source)
for x,i in zip(DataLoader(dataset,batch_size=2,num_workers=0),range(6)):
    print(str(x).replace(' ','').replace('\n',''))

Worker id:  0
WalkerBase::__init__
Env Id:  139847813016400
Env Id:  139847813016400
{'this':tensor([[[1,1,1,1,1]],[[2,2,2,2,2]]])}
Env Id:  139847813016400
Env Id:  139847813016400
{'this':tensor([[[3,3,3,3,3]],[[4,4,4,4,4]]])}
Env Id:  139847813016400
Env Id:  139847813016400
{'this':tensor([[[5,5,5,5,5]],[[6,6,6,6,6]]])}
Env Id:  139847813016400
Env Id:  139847813016400
{'this':tensor([[[7,7,7,7,7]],[[8,8,8,8,8]]])}
Env Id:  139847813016400
Env Id:  139847813016400
{'this':tensor([[[9,9,9,9,9]],[[10,10,10,10,10]]])}
Env Id:  139847813016400
Env Id:  139847813016400
{'this':tensor([[[11,11,11,11,11]],[[12,12,12,12,12]]])}
Env Id:  139847813016400
Env Id:  139847813016400


## Fastai Compatibility
Now lets get this working with the fastai API!

In [15]:
from fastai.data.all import *

### TfmdList Compatability

> Note: First issue we run into: It would be nice to leverage the transform API and the TfmdLists
        would be great in case we want to execute transforms on the returned items. In this case,
        we want to forgo the `SourceDataset` since we want to use the `TfmdList`s instead. 

> Note: Additional note, I wonder what the real difference is between a `TfmdList` and a `Dataset`?

In [16]:
source=Source(cbs=VerboseChecked)
# dataset=SourceDataset(source)

So from the looks of the config below, this should be fine right? We have an iterable dataset,
so we indicate that it is not indexed, and that shuffling wouldn't make sense. 

There is some strange things we need to do to actually make this work with defaults. We need to do `type_tfms` 
on the items since they need to be iterables. We then need to do `item_tfms` to tell fastai that it is supposed
to try to iterate through these are opposed to simply "pushing" them through the tfm pipeline. 

Let's see if this works!

In [17]:
try:
    dls=DataBlock(
        blocks=TransformBlock(
            dls_kwargs={'indexed':False,'shuffle':False}),
    ).dataloaders([source],n=15,bs=10,num_workers=0,verbose=True)
    for x in dls[0]:print(x)
except TypeError as e: print(e)

Collecting items from [<__main__.Source object at 0x7f30db3da970>]
Found 1 items
2 datasets of sizes 1,0
Setting up Pipeline: 
Setting up after_item: Pipeline: ToTensor
Setting up before_batch: Pipeline: 
Setting up after_batch: Pipeline: 
Could not do one pass in your dataloader, there is something wrong in it
default_collate: batch must contain tensors, numpy arrays, numbers, dicts or lists; found <class '__main__.Source'>


Oops! Seems like instead of iterating through the item, it is just passing the item into the collation mechanism. 
Let's manually make the items iterable!

In [18]:
try:
    dls=DataBlock(
        blocks=TransformBlock(
            type_tfms=[lambda o: o.after_create(),lambda o:iter(o)],
            dls_kwargs={'indexed':False,'shuffle':False}),
    ).dataloaders([source],n=15,bs=10,num_workers=0,verbose=True)
    for x in dls[0]:print(x)
except TypeError as e:print(e)

Collecting items from [<__main__.Source object at 0x7f30db3da970>]
Found 1 items
2 datasets of sizes 1,0
Setting up Pipeline: <lambda> -> <lambda>
Worker id:  0
WalkerBase::__init__
Setting up after_item: Pipeline: ToTensor
Setting up before_batch: Pipeline: 
Setting up after_batch: Pipeline: 
Could not do one pass in your dataloader, there is something wrong in it
Worker id:  0
WalkerBase::__init__
default_collate: batch must contain tensors, numpy arrays, numbers, dicts or lists; found <class 'generator'>


Ok,ok lets also tell it to pull items out of the generator...

In [19]:
dls=DataBlock(
    blocks=TransformBlock(
        type_tfms=[lambda o: o.after_create(),lambda o:iter(o)],
        item_tfms=lambda o:next(o),
        dls_kwargs={'indexed':False,'shuffle':False}),
).dataloaders([source],n=15,bs=10,num_workers=0,verbose=True)
for x in dls[0]:print(x)

Collecting items from [<__main__.Source object at 0x7f30db3da970>]
Found 1 items
2 datasets of sizes 1,0
Setting up Pipeline: <lambda> -> <lambda>
Worker id:  0
WalkerBase::__init__
Setting up after_item: Pipeline: <lambda> -> ToTensor
Setting up before_batch: Pipeline: 
Setting up after_batch: Pipeline: 
Could not do one pass in your dataloader, there is something wrong in it
Worker id:  0
WalkerBase::__init__
Env Id:  139847813048928
({'this': tensor([[[1, 1, 1, 1, 1]]], device='cuda:0')},)


Huh... It only loops once since there is only 1 "item" in the list. This is not desirable behavior since this is 
an iterable. The number of loops should be able to be arbitrarily defined via `n` and `bs` especially if the items
don't have a length to them. There are a couple additional worries that I have:
- We may not want to call `iter` of the items until they are loaded onto a worker/passed to a process. This is due to 
many/all iterable sources not being picklable. The `type_tfms` might do this too early. 
- Why do we need to define the `item_tfms` above in the first place? the dataloader should understand that the 
item is iterable to just pull from it?

You might wonder why we can't just pass a source directly into the `DataBlock`, however anything passed needs to have a len...

In [20]:
try:
    dls=DataBlock(
            blocks=TransformBlock(
            type_tfms=[lambda o: o.after_create(),lambda o:iter(o)],
            item_tfms=lambda o:next(o),
            dls_kwargs={'indexed':False,'shuffle':False})
    ).dataloaders(source)
except TypeError as e: print(e)

object of type 'Source' has no len()


Ok... so how do get `dls` to iterate more than just the number of items? Well, the first issue is...

In [21]:
TfmdLists.__iter__??

[0;31mSignature:[0m [0mTfmdLists[0m[0;34m.[0m[0m__iter__[0m[0;34m([0m[0mself[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m <no docstring>
[0;31mSource:[0m        [0;32mdef[0m [0m__iter__[0m[0;34m([0m[0mself[0m[0;34m)[0m[0;34m:[0m [0;32mreturn[0m [0;34m([0m[0mself[0m[0;34m[[0m[0mi[0m[0;34m][0m [0;32mfor[0m [0mi[0m [0;32min[0m [0mrange[0m[0;34m([0m[0mlen[0m[0;34m([0m[0mself[0m[0;34m)[0m[0;34m)[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mFile:[0m      ~/fastai/fastai/data/core.py
[0;31mType:[0m      function


And...

In [22]:
Datasets.__iter__??

[0;31mSignature:[0m [0mDatasets[0m[0;34m.[0m[0m__iter__[0m[0;34m([0m[0mself[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m <no docstring>
[0;31mSource:[0m        [0;32mdef[0m [0m__iter__[0m[0;34m([0m[0mself[0m[0;34m)[0m[0;34m:[0m [0;32mreturn[0m [0;34m([0m[0mself[0m[0;34m[[0m[0mi[0m[0;34m][0m [0;32mfor[0m [0mi[0m [0;32min[0m [0mrange[0m[0;34m([0m[0mlen[0m[0;34m([0m[0mself[0m[0;34m)[0m[0;34m)[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mFile:[0m      ~/fastai/fastai/data/core.py
[0;31mType:[0m      function


So even though we have gone through the work to indicate that these items and the overall dataset does not have a length,
we still are constrained by a `len` call. This seems to be a fundamental issue with the `TfmdLists`. Maybe we can trick it into 
thinking there are `n` items when there really is only one...

In [23]:
# exports
from itertools import cycle

class IterableTfmdLists(TfmdLists):
    def __iter__(self): return cycle(self[i] for i in range(len(self)))
    
class IterableDatasets(Datasets):
    def __iter__(self): return cycle(self[i] for i in range(len(self)))

Now that we have a custom `TfmdLists` that will cycle through all the items, we need to modfy 
`DataBlock` and `Datasets` to accept these.

In [24]:
# exports
class IterableDataBlock(DataBlock):
    tl_type = TfmdLists
    datasets_type = Datasets

    @delegates(DataBlock)
    def __init__(self,blocks=None,datasets_type=None,n_inp=None,**kwargs):
        blocks = L(self.blocks if blocks is None else blocks)
        for b in blocks:
            if getattr(b, 'tl_type', None) is not None: self.tl_type = b.tl_type
        if datasets_type is not None: self.datasets_type=datasets_type
        if (not is_listy(blocks) or len(blocks)==1) and n_inp is not None: n_inp=0
        super().__init__(blocks=blocks,n_inp=n_inp, **kwargs)
        
    def datasets(self, source, verbose=False):
        self.source = source                     ; pv(f"Collecting items from {source}", verbose)
        items = (self.get_items or noop)(source) ; pv(f"Found {len(items)} items", verbose)
        splits = (self.splitter or RandomSplitter())(items)
#         pv(f"{len(splits)} datasets of sizes {','.join([str(len(s)) for s in splits])}", verbose)
        return self.datasets_type(items, tfms=self._combine_type_tfms(), splits=splits, dl_type=self.dl_type, 
                                  n_inp=self.n_inp, verbose=verbose, tl_type=self.tl_type)

@patch
def __init__(self:Datasets, items=None, tfms=None, tls=None, n_inp=None, dl_type=None,tl_type=TfmdLists, **kwargs):
    super(Datasets,self).__init__(dl_type=dl_type)
    self.tls = L(tls if tls else [tl_type(items, t, **kwargs) for t in L(ifnone(tfms,[None]))])
    self.n_inp = ifnone(n_inp, max(1, len(self.tls)-1))
    
class TransformBlock():
    "A basic wrapper that links defaults transforms for the data block API"
    def __init__(self, type_tfms=None, item_tfms=None, batch_tfms=None, dl_type=None, dls_kwargs=None, tl_type=None):
        self.type_tfms  =            L(type_tfms)
        self.item_tfms  = ToTensor + L(item_tfms)
        self.batch_tfms =            L(batch_tfms)
        self.dl_type,self.dls_kwargs = dl_type,({} if dls_kwargs is None else dls_kwargs)
        self.tl_type = tl_type

So we have modified the fastai data block API to handle custom `TfmdList`s, let's try these out!

In [25]:
# exports
class Source(Loop):
    _loop=_loop
    _events=source_events
    _default='source'    
    end_event=parse_events(_loop)[-1]
    
    @delegates(Loop)
    def __init__(self,cbs=None,**kwargs):
        super().__init__(cbs=cbs,**kwargs)
        store_attr(but='cbs')
        self.idx=0
        
    def after_create(self):
        self('initialize')
        print('init')
        return self
        
    def __iter__(self):
        while True:
            self.idx+=1
            self('do_step')
            yield {'this':torch.full((1,5),self.idx)}

In [26]:
def mutli_recreate():
    worker_id=get_worker_info()
    if worker_id is not None:
        print('reinit',worker_id.id)
        print(worker_id)
        [worker_id.dataset.d.items[i].after_create() for i in range(len(worker_id.dataset.d.items))]

In [27]:
source=Source()
block=IterableDataBlock(
    datasets_type=IterableDatasets,
    get_items=lambda o:[j.after_create() for j in o],
    splitter=lambda o:[[0]],
    blocks=TransformBlock(
        type_tfms=lambda o:iter(o),
        item_tfms=lambda o:next(o),
        tl_type=IterableTfmdLists,
        dls_kwargs={'indexed':False,'shuffle':False,'persistent_workers':True,'pin_memory':True}),
)
dls=block.dataloaders([source],n=10,bs=3,num_workers=2,verbose=True,wif=mutli_recreate)
for x in dls[0]:print(x)
for x in dls[0]:print(x)

Collecting items from [<__main__.Source object at 0x7f30daa73e80>]
init
Found 1 items
Setting up Pipeline: <lambda>
Setting up after_item: Pipeline: <lambda> -> ToTensor
Setting up before_batch: Pipeline: 
Setting up after_batch: Pipeline: 
reinit 0reinit
 1
WorkerInfo(id=1, num_workers=2, seed=5318618641177852275, dataset=<fastai.data.load._FakeLoader object at 0x7f30daa374f0>)
init
WorkerInfo(id=0, num_workers=2, seed=5318618641177852274, dataset=<fastai.data.load._FakeLoader object at 0x7f30daa374f0>)
init
[{'this': tensor([[[1, 1, 1, 1, 1]],

        [[2, 2, 2, 2, 2]],

        [[3, 3, 3, 3, 3]]], device='cuda:0')}]
[{'this': tensor([[[1, 1, 1, 1, 1]],

        [[2, 2, 2, 2, 2]],

        [[3, 3, 3, 3, 3]]], device='cuda:0')}]
[{'this': tensor([[[4, 4, 4, 4, 4]],

        [[5, 5, 5, 5, 5]],

        [[6, 6, 6, 6, 6]]], device='cuda:0')}]
[{'this': tensor([[[4, 4, 4, 4, 4]]], device='cuda:0')}]
reinit0 
WorkerInfo(id=0, num_workers=2, seed=2780370516484742933, dataset=<fastai.data.l

### Multi Epoch Iteration Issues

Great! Its iterating! But it seems to reset every iteration for some reason, i.e. It should just keep counting up...

In [28]:
dls=block.dataloaders([source],n=10,bs=3,num_workers=0,verbose=True,wif=mutli_recreate)
for x in dls[0]:print(x)
for x in dls[0]:print(x)

Collecting items from [<__main__.Source object at 0x7f30daa73e80>]
init
Found 1 items
Setting up Pipeline: <lambda>
Setting up after_item: Pipeline: <lambda> -> ToTensor
Setting up before_batch: Pipeline: 
Setting up after_batch: Pipeline: 
[{'this': tensor([[[1, 1, 1, 1, 1]],

        [[2, 2, 2, 2, 2]],

        [[3, 3, 3, 3, 3]]], device='cuda:0')}]
[{'this': tensor([[[4, 4, 4, 4, 4]],

        [[5, 5, 5, 5, 5]],

        [[6, 6, 6, 6, 6]]], device='cuda:0')}]
[{'this': tensor([[[7, 7, 7, 7, 7]],

        [[8, 8, 8, 8, 8]],

        [[9, 9, 9, 9, 9]]], device='cuda:0')}]
[{'this': tensor([[[10, 10, 10, 10, 10]]], device='cuda:0')}]
[{'this': tensor([[[11, 11, 11, 11, 11]],

        [[12, 12, 12, 12, 12]],

        [[13, 13, 13, 13, 13]]], device='cuda:0')}]
[{'this': tensor([[[14, 14, 14, 14, 14]],

        [[15, 15, 15, 15, 15]],

        [[16, 16, 16, 16, 16]]], device='cuda:0')}]
[{'this': tensor([[[17, 17, 17, 17, 17]],

        [[18, 18, 18, 18, 18]],

        [[19, 19, 19, 19, 

You will notice that the culprit seems to be related to whether the dataloader is
doing multiprocessing or not. Interestingly, it seems that persistent workers does not
work (?)

> Important: Note above
    
This is because of the line...

In [29]:
DataLoader.__iter__??

[0;31mSignature:[0m [0mDataLoader[0m[0;34m.[0m[0m__iter__[0m[0;34m([0m[0mself[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m <no docstring>
[0;31mSource:[0m   
    [0;32mdef[0m [0m__iter__[0m[0;34m([0m[0mself[0m[0;34m)[0m[0;34m:[0m[0;34m[0m
[0;34m[0m        [0mself[0m[0;34m.[0m[0mrandomize[0m[0;34m([0m[0;34m)[0m[0;34m[0m
[0;34m[0m        [0mself[0m[0;34m.[0m[0mbefore_iter[0m[0;34m([0m[0;34m)[0m[0;34m[0m
[0;34m[0m        [0mself[0m[0;34m.[0m[0m__idxs[0m[0;34m=[0m[0mself[0m[0;34m.[0m[0mget_idxs[0m[0;34m([0m[0;34m)[0m [0;31m# called in context of main process (not workers/subprocesses)[0m[0;34m[0m
[0;34m[0m        [0;32mfor[0m [0mb[0m [0;32min[0m [0m_loaders[0m[0;34m[[0m[0mself[0m[0;34m.[0m[0mfake_l[0m[0;34m.[0m[0mnum_workers[0m[0;34m==[0m[0;36m0[0m[0;34m][0m[0;34m([0m[0mself[0m[0;34m.[0m[0mfake_l[0m[0;34m)[0m[0;34m:[0m[0;34m[0m
[0;34m[0m            

You will notice that the loader is redefined per iter. This will happen per epoch then.
Since the workers are tied to a dataloader, it may persist the worker between batches,
however it will not persist them between epochs. This is undesirable. We can try to fix this through by changing how loaders are
handled if persistant worker is set to true. You will find that this does not fix the issue due to core pytorch issues that will be illistrated later...

### Custom collater's

Finally, we need to change how collation is handled. Since we are returning batch-wise
dictionaries, we want to stack them. You will notice that the [[1,1,1,1]] get turned into [[[1,1,1,1]]].
We don't want this, so lets change it!

In [1]:
# export
from torch.utils.data._utils.collate import default_collate_err_msg_format,int_classes,string_classes,container_abcs
from torch.utils.data._utils.collate import *

def vstack_collate(batch):
    "99% similar to default_collate, however vstacks tensors thus assuming they already have a batch dim"

    elem = batch[0]
    elem_type = type(elem)
    if isinstance(elem, torch.Tensor):
        out = None
        if torch.utils.data.get_worker_info() is not None:
            # If we're in a background process, concatenate directly into a
            # shared memory tensor to avoid an extra copy
            numel = sum([x.numel() for x in batch])
            storage = elem.storage()._new_shared(numel)
            out = elem.new(storage)
        return torch.vstack(batch, out=out)
    elif elem_type.__module__ == 'numpy' and elem_type.__name__ != 'str_' \
            and elem_type.__name__ != 'string_':
        if elem_type.__name__ == 'ndarray' or elem_type.__name__ == 'memmap':
            # array of string classes and object
            if np_str_obj_array_pattern.search(elem.dtype.str) is not None:
                raise TypeError(default_collate_err_msg_format.format(elem.dtype))

            return vstack_collate([torch.as_tensor(b) for b in batch])
        elif elem.shape == ():  # scalars
            return torch.as_tensor(batch)
    elif isinstance(elem, float):
        return torch.tensor(batch, dtype=torch.float64)
    elif isinstance(elem, int_classes):
        return torch.tensor(batch)
    elif isinstance(elem, string_classes):
        return batch
    elif isinstance(elem, container_abcs.Mapping):
        return {key: vstack_collate([d[key] for d in batch]) for key in elem}
    elif isinstance(elem, tuple) and hasattr(elem, '_fields'):  # namedtuple
        return elem_type(*(default_collate(samples) for samples in zip(*batch)))
    elif isinstance(elem, container_abcs.Sequence):
        # check to make sure that the elements in batch have consistent size
        it = iter(batch)
        elem_size = len(next(it))
        if not all(len(elem) == elem_size for elem in it):
            raise RuntimeError('each element in list of batch should be of equal size')
        transposed = zip(*batch)
        return [vstack_collate(samples) for samples in transposed]

    raise TypeError(default_collate_err_msg_format.format(elem_type))

In [31]:
# exports
_collate_types = (ndarray, Tensor, typing.Mapping, str)

def fr_collate(t):
    "A replacement for PyTorch `default_collate` which maintains types and handles `Sequence`s"
    b = t[0]
    return (vstack_collate(t) if isinstance(b, _collate_types)
            else type(t[0])([fr_collate(s) for s in zip(*t)]) if isinstance(b, Sequence)
            else vstack_collate(t))

If the `num_worker > 0` and `persistent_workers==True`, then we need to have the loaders be re-dfined outside
of the __iter__ method.

In [32]:
# exports

from fastai.data.load import _loaders

class IterableTfmdDL(TfmdDL):
    def __init__(self, dataset,bs=64, shuffle=False, num_workers=None, verbose=False, do_setup=True,**kwargs):
        super().__init__(dataset, bs=bs, shuffle=shuffle, num_workers=num_workers,**kwargs)
        self._loader=None
        
    def create_batch(self, b): return (fr_collate,fa_convert)[self.prebatched](b)

### Pytorch persistent worker Limitation
> This is probably the worst part of the RL <-> Pytorch issue.

I thought that:

    - If self.num_workers > 0
    - And fake_l.persistent_workers==True

All I needed to do was make sure fastai doesn't destroy dls if these above cases were
true. The below code that would be added to `IterableTfmdDL` would have fixed this.

```python
    def sample(self): 
        return (b for i,b in enumerate(self.__idxs) if i//(self.bs or 1)%self.num_workers==self.offs)

        
    def __iter__(self):
        self.randomize()
        self.before_iter()
        self.__idxs=self.get_idxs() # called in context of main process (not workers/subprocesses)
        if self.fake_l.num_workers>0 and self.fake_l.persistent_workers and self._loader is None:
            self._loader=_loaders[self.fake_l.num_workers==0](self.fake_l)
        
        for b in ifnone(self._loader,_loaders[self.fake_l.num_workers==0](self.fake_l)):
#         for b in _loaders[self.fake_l.num_workers==0](self.fake_l):
            if self.device is not None: b = to_device(b, self.device)
            yield self.after_batch(b)
        self.after_iter()
        if hasattr(self, 'it'): del(self.it)
```

This doesn't fix the issue. 

Per https://pytorch.org/docs/stable/data.html:
        
    persistent_workers (bool, optional) â€“ If True, the data loader will not shutdown the 
                                          worker processes after a dataset has been consumed once. 
                                          This allows to maintain the workers Dataset instances 
                                          alive. (default: False)

Iterable datasets don't get "consumed" bsaed on if an environment is done. They can consumed
if the dataset reaches an arbitrary length. This means that an agent might be in the middle of 
executing an episode, and the dataset will end, and next epoch will start from scratch.

The is really bad **unless** the user does not use multiprocessing at all, or
we make "n" really really big, so that we can get a few full episodes completed.

Overall, it would be better to get workers to persist between epochs.

### Learner compat

Let's try to plug this into a `Learner` then...

In [33]:
from torch.nn import Linear

class FakeModel(Module):
    def __init__(self):
        self.fc=Linear(5,5)
        
    def forward(self,x):
        print(x)
        return x['this']

In [34]:
IterableDataBlock??

[0;31mInit signature:[0m
[0mIterableDataBlock[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0mblocks[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mdatasets_type[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mn_inp[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mdl_type[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mgetters[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mitem_tfms[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mbatch_tfms[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0;34m*[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mget_items[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0msplitter[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mget_y[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mget_x[0m[0;34m=[0m[0;32mNone[0

In [35]:
TransformBlock??

[0;31mInit signature:[0m
[0mTransformBlock[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0mtype_tfms[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mitem_tfms[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mbatch_tfms[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mdl_type[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mdls_kwargs[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mtl_type[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m      A basic wrapper that links defaults transforms for the data block API
[0;31mType:[0m           type
[0;31mSubclasses:[0m     


In [36]:
# exports
def get_sources(_,ls): return [o.after_create() for o in ls]

class SourceDataBlock(IterableDataBlock):
    datasets_type=IterableDatasets
    get_items=get_sources
    blocks=TransformBlock(
        type_tfms=[lambda o:iter(o)],
        item_tfms=lambda o:next(o),
        tl_type=IterableTfmdLists,
        dl_type=IterableTfmdDL,
        dls_kwargs={'indexed':False,'shuffle':False,'persistent_workers':True})

In addition to above, we also don't want to run evaluation epochs since there isn't a simple
way to split envirnoments between those 2 phases. Maybe in the near future we can have this...

In [37]:
# export
@patch
def _do_epoch_validate(self:Learner,*args,**kwargs): return 0

Another augmentation we need to do is allow metrics to be run during training time...

In [38]:
# export
@patch
def after_create(self:Callback): 
    for cb in self.learn.cbs: 
        if hasattr(cb,'train_metrics'): cb.train_metrics=True


Since a lot of the learners will only have the `xb` field populated, we need look at
the len of xb also

In [39]:
# export
@patch
def after_batch(self:Recorder):
    "Update all metrics and records lr and smooth loss in training"
    if len(self.yb) == 0 and len(self.xb) == 0: return
    mets = self._train_mets if self.training else self._valid_mets
    for met in mets: met.accumulate(self.learn)
    if not self.training: return
    self.lrs.append(self.opt.hypers[-1]['lr'])
    self.losses.append(self.smooth_loss.value)
    self.learn.smooth_loss = self.smooth_loss.value


In [40]:
source=Source(cbs=VerboseChecked)
block=SourceDataBlock()
dls=block.dataloaders([source],n=5,bs=2,num_workers=0,verbose=True)

Collecting items from [<__main__.Source object at 0x7f30daa2aa90>]
Worker id:  0
WalkerBase::__init__
init
Found 1 items
Setting up Pipeline: SourceDataBlock.<lambda>
Could not do one pass in your dataloader, there is something wrong in it


In [41]:
learn=Learner(dls,FakeModel(),loss_func=lambda o: 0.5)

In [42]:
learn.fit(4)

epoch,train_loss,valid_loss,time
0,0.0,00:00,
1,0.0,00:00,
2,0.0,00:00,
3,0.0,00:00,


Env Id:  139847803273664
Env Id:  139847803273664
{'this': tensor([[1, 1, 1, 1, 1],
        [2, 2, 2, 2, 2]], device='cuda:0')}
Env Id:  139847803273664
Env Id:  139847803273664
{'this': tensor([[3, 3, 3, 3, 3],
        [4, 4, 4, 4, 4]], device='cuda:0')}
Env Id:  139847803273664
{'this': tensor([[5, 5, 5, 5, 5]], device='cuda:0')}
Env Id:  139847803273664
Env Id:  139847803273664
{'this': tensor([[6, 6, 6, 6, 6],
        [7, 7, 7, 7, 7]], device='cuda:0')}
Env Id:  139847803273664
Env Id:  139847803273664
{'this': tensor([[8, 8, 8, 8, 8],
        [9, 9, 9, 9, 9]], device='cuda:0')}
Env Id:  139847803273664
{'this': tensor([[10, 10, 10, 10, 10]], device='cuda:0')}
Env Id:  139847803273664
Env Id:  139847803273664
{'this': tensor([[11, 11, 11, 11, 11],
        [12, 12, 12, 12, 12]], device='cuda:0')}
Env Id:  139847803273664
Env Id:  139847803273664
{'this': tensor([[13, 13, 13, 13, 13],
        [14, 14, 14, 14, 14]], device='cuda:0')}
Env Id:  139847803273664
{'this': tensor([[15, 15, 

This looks good, however because of the `persistent worker's` issue, if we have `num_workers>>0`...

In [43]:
source=Source(cbs=VerboseChecked)
dls=block.dataloaders([source],n=5,bs=2,num_workers=2,verbose=True)
learn=Learner(dls,FakeModel(),loss_func=lambda o: 0.5)
learn.fit(4)

Collecting items from [<__main__.Source object at 0x7f30daa16ca0>]
Worker id:  0
WalkerBase::__init__
init
Found 1 items
Setting up Pipeline: SourceDataBlock.<lambda>
Could not do one pass in your dataloader, there is something wrong in it


epoch,train_loss,valid_loss,time
0,0.0,00:00,
1,0.0,00:00,
2,0.0,00:00,
3,0.0,00:00,


Env Id:  139848597874624
Env Id:  139848597874624Env Id: 
 139848597874624
Env Id: Env Id:   139848597874624
139848597874624
{'this': tensor([[1, 1, 1, 1, 1],
        [2, 2, 2, 2, 2]], device='cuda:0')}
{'this': tensor([[1, 1, 1, 1, 1],
        [2, 2, 2, 2, 2]], device='cuda:0')}
{'this': tensor([[3, 3, 3, 3, 3]], device='cuda:0')}
Env Id:  139848597874624
Env Id:  Env Id: 139848597874624 139848597874624
Env Id:  
139848597874624
Env Id:  139848597874624
{'this': tensor([[1, 1, 1, 1, 1],
        [2, 2, 2, 2, 2]], device='cuda:0')}
{'this': tensor([[1, 1, 1, 1, 1],
        [2, 2, 2, 2, 2]], device='cuda:0')}
{'this': tensor([[3, 3, 3, 3, 3]], device='cuda:0')}
Env Id:  139848597874624
Env Id:  Env Id: 139848597874624 139848597874624

Env Id:  Env Id: 139848597874624 
139848597874624
{'this': tensor([[1, 1, 1, 1, 1],
        [2, 2, 2, 2, 2]], device='cuda:0')}
{'this': tensor([[1, 1, 1, 1, 1],
        [2, 2, 2, 2, 2]], device='cuda:0')}
{'this': tensor([[3, 3, 3, 3, 3]], device='cuda:0')

Yeah... so you will notice that the `source` object seems to be reset every epoch... This is because the workers
are being re-created between epochs.

## Conclusion

Getting fastai API to work with RL environments isn't too bad. I am looking forward to v3.
The ultamate scary blocker is that fact persistent workers are not persistent for the dl's 
life cycle, but instead are persistent for a dl's iteration.

For now, the most efficient way to run an agent is with `num_workers==0`. I would be interested
in fixing this however.

## Export

In [3]:
pip show fastai

Name: fastai
Version: 2.4.2
Summary: fastai simplifies training fast and accurate neural nets using modern best practices
Home-page: https://github.com/fastai/fastai/tree/master/
Author: Jeremy Howard, Sylvain Gugger, and contributors
Author-email: info@fast.ai
License: Apache Software License 2.0
Location: /home/fastrl_user/fastai
Requires: pip, packaging, fastcore, torchvision, matplotlib, pandas, requests, pyyaml, fastprogress, pillow, scikit-learn, scipy, spacy, torch
Required-by: fastrl
Note: you may need to restart the kernel to use updated packages.


In [2]:
# hide
from fastcore.imports import in_colab

# Since colab still requires tornado<6, we don't want to import nbdev if we don't have to
if not in_colab():
    from nbdev.export import *
    from nbdev.export2html import *
    from nbdev.cli import make_readme
    make_readme()
    notebook2script()
    notebook2html()

converting /home/fastrl_user/fastrl/nbs/index.ipynb to README.md
Converted 00_core.ipynb.
Converted 00_nbdev_extension.ipynb.
Converted 03_callback.core.ipynb.
Converted 04_agent.ipynb.
Converted 05_data.test_async.ipynb.
Converted 05a_data.block.ipynb.
Converted 05b_data.block_simple.ipynb.
Converted 05c_data.gym.ipynb.
Converted 10a_agents.dqn.core.ipynb.
Converted 10b_agents.dqn.targets.ipynb.
Converted 10c_agents.dqn.double.ipynb.
Converted 10d_agents.dqn.dueling.ipynb.
Converted 10e_agents.dqn.categorical.ipynb.
Converted 11a_agents.policy_gradient.ppo.ipynb.
Converted 20_test_utils.ipynb.
Converted index.ipynb.
Converted nbdev_template.ipynb.
converting: /home/fastrl_user/fastrl/nbs/05b_data.block_simple.ipynb
