In [1]:
#|hide
#|eval: false
! [ -e /content ] && pip install -Uqq fastrl['dev'] pyvirtualdisplay && \
                     apt-get install -y xvfb python-opengl > /dev/null 2>&1 
# NOTE: IF YOU SEE VERSION ERRORS, IT IS SAFE TO IGNORE THEM. COLAB IS BEHIND IN SOME OF THE PACKAGE VERSIONS

In [2]:
#|hide
from fastcore.imports import in_colab
# Since colab still requires tornado<6, we don't want to import nbdev if we don't have to
if not in_colab():
    from nbdev.showdoc import *
    from nbdev.imports import *
    if not os.environ.get("IN_TEST", None):
        assert IN_NOTEBOOK
        assert not IN_COLAB
        assert IN_IPYTHON
else:
    # Virutual display is needed for colab
    from pyvirtualdisplay import Display
    display = Display(visible=0, size=(400, 300))
    display.start()

In [3]:
#|default_exp learner.core

In [4]:
#|export
# Python native modules
import os
# Third party libs
from fastcore.all import *
import torchdata.datapipes as dp
import torch
from fastai.torch_basics import *
from fastai.torch_core import *
from torch.utils.data.dataloader_experimental import DataLoader2
from torchdata.dataloader2.graph import find_dps,traverse
# Local modules
from fastrl.core import *
from fastrl.pipes.core import *
from fastrl.loggers.core import *

# Learner Core
> Core DataPipes for building Learners

In [15]:
#|export
class LearnerBase(dp.iter.IterDataPipe):
    def __init__(self,
            model:Module, # The base NN that we getting raw action values out of.
            dls:List[DataLoader2], # The dataloaders to read data from for training
            loss_func=None, # The loss function to use
            opt=None, # The optimizer to use
            # LearnerBase will yield each dl individually by default. If `zipwise=True`
            # next() will be called on `dls` and will `yield next(dl1),next(dl2),next(dl1)...`
            zipwise:bool=False
    ):
        self.loss_func = loss_func
        self.opt = opt
        self.model = model
        self.iterable = dls
        self.zipwise = zipwise
        self.learner_base = self
        self.batches = find_dp(traverse(dls[0].dataset),dp.iter.Header).limit

    def __iter__(self):
        dls = [iter(dl) for dl in self.iterable]
        exhausted = []
        if self.zipwise:
            yield from [next(dl) for i,dl in enumerate(dls) if i not in exhausted]
        else:
            while not exhausted:
                for i,dl in enumerate(dls): 
                    while i not in exhausted:
                        try:
                            yield next(dl)
                        except StopIteration:
                            exhausted.append(i)
                            
add_docs(
    LearnerBase,
    """
    """
) 

In [6]:
#|export
class LearnerHead(dp.iter.IterDataPipe):
    def __init__(self,source_datapipe):
        self.source_datapipe = source_datapipe
        self.learner_base = find_dp(traverse(self.source_datapipe),LearnerBase)

    def __iter__(self): yield from self.source_datapipe
    
    def fit(self,epochs):
        epocher = find_dp(traverse(self),EpocherCollector)
        epocher.epochs = epochs
        
        for iteration in self: 
            pass
        
add_docs(
    LearnerHead,
    """
    """,
    fit="Runs the `LearnerHead` pipeline for `epochs`"
)  

In [7]:
from torch.nn import *
from torch.optim import *
from fastai.torch_basics import *
from fastai.torch_core import *

class DQN(Module):
    def __init__(self,state_sz:int,action_sz:int,hidden=512):
        self.layers=Sequential(
            Linear(state_sz,hidden),
            ReLU(),
            Linear(hidden,action_sz),
        )
    def forward(self,x): return self.layers(x)

class SimpleModelRunner(dp.iter.IterDataPipe):
    def __init__(self,source_datapipe): 
        self.source_datapipe = source_datapipe
        self.agent_base = find_agent_base(self.source_datapipe)
    
    def __iter__(self):
        for o in self.source_datapipe: 
            try: yield self.agent_base.model(o)
            except Exception:
                print('Failed on ',o)
                raise

In [8]:
from fastrl.agents.core import *
# Setup up the core NN
torch.manual_seed(0)
model = DQN(4,2)
# Setup the agent
agent = AgentBase(model,[])
# All the things that make this agent unique and special
# In this instance, all this module does is pass the action directly through to the model.
agent = SimpleModelRunner(agent)
# Bring everything together into the AgentHead where actions will be passed and then run through the pipeline
agent = AgentHead(agent)

If we pass a list of tensors, we will get a list of actions:

In [9]:
for action in agent([tensor([1,2,3,4]).float()]):
    print(action)

tensor([[-0.2909, -1.0357]], grad_fn=<AddmmBackward0>)


In [10]:
for action in agent([tensor([1,2,3,4]).float()]*3):
    print(action)

tensor([[-0.2909, -1.0357]], grad_fn=<AddmmBackward0>)
tensor([[-0.2909, -1.0357]], grad_fn=<AddmmBackward0>)
tensor([[-0.2909, -1.0357]], grad_fn=<AddmmBackward0>)


In [11]:
# from torchdata.datapipes.utils import to_graph
# to_graph(agent)

In [12]:
from fastrl.pipes.core import *
from fastrl.fastai.data.block import *
from fastrl.envs.gym import *

def baseline_test(envs,total_steps,seed=0):
    pipe = dp.map.Mapper(envs)
    pipe = TypeTransformLoop(pipe,[GymTypeTransform])
    pipe = dp.iter.MapToIterConverter(pipe)
    pipe = dp.iter.InMemoryCacheHolder(pipe)
    pipe = pipe.header(limit=10)
    pipe = GymStepper(pipe,seed=seed)

    steps = [step for _,step in zip(*(range(total_steps),pipe))]
    return steps, pipe


In [13]:
steps, pipe = baseline_test(['CartPole-v1'],0)
steps

[]

In [19]:
#|hide
from fastcore.imports import in_colab

# Since colab still requires tornado<6, we don't want to import nbdev if we don't have to
if not in_colab():
    from nbdev import nbdev_export
    nbdev_export()