In [1]:
#hide
#skip
%config Completer.use_jedi = False
%config IPCompleter.greedy=True
# upgrade fastrl on colab
! [ -e /content ] && pip install -Uqq fastrl['dev'] pyvirtualdisplay && \
                     apt-get install -y xvfb python-opengl > /dev/null 2>&1 
# NOTE: IF YOU SEE VERSION ERRORS, IT IS SAFE TO IGNORE THEM. COLAB IS BEHIND IN SOME OF THE PACKAGE VERSIONS

In [2]:
# hide
from fastcore.imports import in_colab
# Since colab still requires tornado<6, we don't want to import nbdev if we don't have to
if not in_colab():
    from nbdev.showdoc import *
    from nbdev.imports import *
    if not os.environ.get("IN_TEST", None):
        assert IN_NOTEBOOK
        assert not IN_COLAB
        assert IN_IPYTHON
else:
    # Virutual display is needed for colab
    from pyvirtualdisplay import Display
    display = Display(visible=0, size=(400, 300))
    display.start()

In [3]:
# default_exp fastai.learner

In [4]:
# export
# Python native modules
import os
from typing import *
# Third party libs
from fastcore.all import *
from torch.utils.data.dataloader_experimental import DataLoader2
import torchdata.datapipes as dp
from torch.nn import *
from torch.optim import *
from fastai.torch_basics import *
from fastai.torch_core import *

# Local modules
from fastrl.fastai.data.block import *
from fastrl.fastai.data.pipes.core import *

# Learner
> A revised fastai learner that uses DataPipe shimming

In [5]:
# export
class XYSplit(dp.iter.IterDataPipe):
    def __init__(self,source_datapipe,learn,x_fld):
        self.source_datapipe = source_datapipe
        self.learn = learn
        self.x_fld = x_fld
        
    def __iter__(self):
        for batch in self.source_datapipe:
            yield (batch[self.x_fld],batch)

In [6]:
# export
class ModelPredict(dp.iter.IterDataPipe):
    def __init__(self,source_datapipe,learn):
        self.source_datapipe = source_datapipe
        self.learn = learn
        
    def __iter__(self):
        for xb,yb in self.source_datapipe:
            self.learn.xb = xb
            self.learn.yb = yb
            self.learn.preds = self.learn.model(xb)
            yield self.learn.preds

In [7]:
# export
class LossCalc(dp.iter.IterDataPipe):
    def __init__(self,source_datapipe,learn,y_target):
        self.source_datapipe = source_datapipe
        self.learn = learn
        self.y_target = y_target
        
    def __iter__(self):
        for batch in self.source_datapipe:
            self.loss_grad = self.learn.loss_func(self.learn.preds, *self.learn.yb[self.y_target])
            self.loss = self.loss_grad.clone()
            yield self.loss

In [None]:
       # self._with_events(self._backward, 'backward', CancelBackwardException)
       #  self._with_events(self._step, 'step', CancelStepException)
       #  self.opt.zero_grad()

In [8]:
# export
def default_train_loop(
    dls:List[DataLoader2],
    cbs:Optional[List[Callback]]=None,
):
    train_valid = L(dls).map(dp.iter.IterableWrapper).add_cbs(cbs)
    
    return train_vals

In [9]:
# export
def only_train_loop(
    dls:List[DataLoader2],
    cbs:Optional[List[Callback]]=None,
):
    train = dp.iter.IterableWrapper(dls,deepcopy=False).add_cbs(cbs)

    # train = 
    
    return train

In [10]:
# export
class Learner(dp.iter.IterDataPipe):
    def __init__(self,model,dls,opt,loss_func,cbs,train_loop=None):
        store_attr('model,dls,opt,loss_func')
        self.cbs = L()
        self.add_cbs(cbs)
        self.train_loop = ifnone(train_loop,default_train_loop)
        
    def fit(self,epochs):
        self.it = iter(self.dls[0])
        train_pipe = only_train_loop(L(self.it),self.cbs) # Do not pass tuple, otherwise traverse will try to read the dl datapipes
        for res in train_pipe:
            print(res)

    def add_cbs(self, cbs):
        L(cbs).map(self.add_cb)
        return self

    def remove_cbs(self, cbs):
        L(cbs).map(self.remove_cb)
        return self

    def add_cb(self, cb):
        if isinstance(cb, type): cb = cb()
        cb.learn = self
        cb.init_pipes()
        setattr(self, cb.name, cb)
        self.cbs.append(cb)
        return self

In [11]:
# export
class DQN(Module):
    def __init__(self,state_sz:int,action_sz:int,hidden=512):
        self.layers=Sequential(
            Linear(state_sz,hidden),
            ReLU(),
            Linear(hidden,action_sz),
        )
    def forward(self,x): return self.layers(x)

In [12]:
class ReinforcementLearningSimpleCallback(Callback):
    call_on=L(dp.iter.IterableWrapper)
    
    def init_pipes(self):
        self.pipes=L(
            partial(XYSplit,learn=self.learn,x_fld='state'),
            partial(ModelPredict,learn=self.learn),
            partial(LossCalc,learn=self.learn,y_target='reward')
        )


In [13]:
block = DataBlock(
    blocks = GymTransformBlock,
    loader_loop=simple_iter_loader_loop
)
ds = block.datapipes(['CartPole-v1']*4,n=10,bs=4)
dls = block.dataloaders(['CartPole-v1']*4,n=20,n_workers=0,bs=4)

In [14]:
for o in ds[0]:
    print(o)
    break

{'state': TensorBatch([[ 0.0137, -0.0230, -0.0459, -0.0483],
             [ 0.0137, -0.0230, -0.0459, -0.0483],
             [ 0.0132, -0.2175, -0.0469,  0.2295],
             [ 0.0137, -0.0230, -0.0459, -0.0483]]), 'next_state': TensorBatch([[ 0.0132, -0.2175, -0.0469,  0.2295],
             [ 0.0132, -0.2175, -0.0469,  0.2295],
             [ 0.0089, -0.4119, -0.0423,  0.5070],
             [ 0.0132, -0.2175, -0.0469,  0.2295]]), 'done': TensorBatch([[False],
             [False],
             [False],
             [False]]), 'reward': TensorBatch([[1.],
             [1.],
             [1.],
             [1.]]), 'action': TensorBatch([[0],
             [0],
             [0],
             [0]]), 'env_id': TensorBatch([[140030438722896],
             [140030438616720],
             [140030438722896],
             [140030438616720]])}


In [15]:
model = DQN(4, 2)

In [16]:
learn = Learner(model, dls, opt=AdamW(model.parameters()), loss_func=MSELoss(),train_loop=only_train_loop,
               cbs=ReinforcementLearningSimpleCallback)

In [17]:
learn.fit(4)

TensorBatch(0.9120, grad_fn=<AliasBackward0>)
TensorBatch(0.8932, grad_fn=<AliasBackward0>)
TensorBatch(0.9198, grad_fn=<AliasBackward0>)
TensorBatch(0.9282, grad_fn=<AliasBackward0>)
TensorBatch(0.9267, grad_fn=<AliasBackward0>)
TensorBatch(0.9327, grad_fn=<AliasBackward0>)
TensorBatch(0.9373, grad_fn=<AliasBackward0>)
TensorBatch(0.9454, grad_fn=<AliasBackward0>)
TensorBatch(0.9558, grad_fn=<AliasBackward0>)
TensorBatch(0.9631, grad_fn=<AliasBackward0>)
TensorBatch(0.9745, grad_fn=<AliasBackward0>)
TensorBatch(0.9935, grad_fn=<AliasBackward0>)
TensorBatch(1.0081, grad_fn=<AliasBackward0>)
TensorBatch(1.0330, grad_fn=<AliasBackward0>)
TensorBatch(1.0678, grad_fn=<AliasBackward0>)
TensorBatch(1.0391, grad_fn=<AliasBackward0>)
TensorBatch(0.9057, grad_fn=<AliasBackward0>)
TensorBatch(0.9020, grad_fn=<AliasBackward0>)
TensorBatch(0.9198, grad_fn=<AliasBackward0>)
TensorBatch(0.9277, grad_fn=<AliasBackward0>)
TensorBatch(0.9287, grad_fn=<AliasBackward0>)
TensorBatch(0.9327, grad_fn=<Alias

  ret = func(*args, **kwargs)
  ret = func(*args, **kwargs)


In [19]:
# hide
from fastcore.imports import in_colab

# Since colab still requires tornado<6, we don't want to import nbdev if we don't have to
if not in_colab():
    from nbdev.export import *
    from nbdev.export2html import *
    from nbdev.cli import *
    make_readme()
    notebook2script(silent=True)

converting /home/fastrl_user/fastrl/nbs/index.ipynb to README.md
