In [1]:
#|hide
#|eval: false
! [ -e /content ] && pip install -Uqq fastrl['dev'] pyvirtualdisplay && \
                     apt-get install -y xvfb python-opengl > /dev/null 2>&1 
# NOTE: IF YOU SEE VERSION ERRORS, IT IS SAFE TO IGNORE THEM. COLAB IS BEHIND IN SOME OF THE PACKAGE VERSIONS

In [2]:
#|hide
#|eval: false
from fastcore.imports import in_colab
# Since colab still requires tornado<6, we don't want to import nbdev if we don't have to
if not in_colab():
    from nbdev.showdoc import *
    from nbdev.imports import *
    if not os.environ.get("IN_TEST", None):
        assert IN_NOTEBOOK
        assert not IN_COLAB
        assert IN_IPYTHON
else:
    # Virutual display is needed for colab
    from pyvirtualdisplay import Display
    display = Display(visible=0, size=(400, 300))
    display.start()

In [3]:
#|default_exp agents.core

In [4]:
#|export
# Python native modules
import os
# Third party libs
from fastcore.all import *
import torchdata.datapipes as dp
import torch

from fastrl.torch_core import *
from torchdata.dataloader2.graph import find_dps,traverse
# Local modules
from fastrl.core import *
from fastrl.pipes.core import *

# Agent Core
> Minimum Agent DataPipes, objects, and utilities

In [5]:
#|export
class AgentBase(dp.iter.IterDataPipe):
    def __init__(self,
            model:Module, # The base NN that we getting raw action values out of.
            action_iterator:list=None, # A reference to an iterator that contains actions to process.
            logger_bases=None
    ):
        self.model = model
        self.iterable = ifnone(action_iterator,[])
        self.agent_base = self
        self.logger_bases = logger_bases
        
    def __iter__(self):
        while self.iterable:
            yield self.iterable.pop(0)
            
add_docs(
    AgentBase,
    """Acts as the footer of the Agent pipeline. 
    Maintains important state such as the `model` being used for get actions from.
    Also optionally allows passing a reference list of `action_iterator` which is a
    persistent list of actions for the entire agent pipeline to process through.
    
    > Important: Must be at the start of the pipeline, and be used with AgentHead at the end.
    
    > Important: `action_iterator` is stored in the `iterable` field. However the recommended
    way of passing actions to the pipeline is to call an `AgentHead` instance.
    """
    
) 

In [6]:
#|export               
class AgentHead(dp.iter.IterDataPipe):
    def __init__(self,source_datapipe):
        self.source_datapipe = source_datapipe
        self.agent_base = find_dp(traverse(self.source_datapipe),AgentBase)

    def __call__(self,steps:list):
        if issubclass(steps.__class__,StepType):
            raise Exception(f'Expected List[{StepType}] object got {type(steps)}\n{steps}')
        self.agent_base.iterable.extend(steps)
        return self

    def __iter__(self): yield from self.source_datapipe
    
    def augment_actions(self,actions): return actions

    def create_step(self,**kwargs): return SimpleStep(**kwargs)
    
add_docs(
    AgentHead,
    """Acts as the head of the Agent pipeline. 
    Used for conveniently adding actions to the pipeline to process.
    
    > Important: Must be paired with `AgentBase`
    """,
    augment_actions="""Called right before being fed into the env. 
    
    > Important: The results of this function will not be kept / used in the step or forwarded to 
    any training code.

    There are cases where either the entire action shouldn't be fed into the env,
    or the version of the action that we want to train on would be compat with the env.
    
    This is also useful if we want to train on the original raw values of the action prior to argmax being run on it for example.
    """,
    create_step="Creates the step used by the env for running, and used by the model for training."
)  

In [7]:
#|export
class SimpleModelRunner(dp.iter.IterDataPipe):
    "Takes input from `source_datapipe` and pushes through the agent bases model assuming there is only one model field."
    def __init__(self,
                 source_datapipe,
                 device:Optional[str]=None
                ): 
        self.source_datapipe = source_datapipe
        self.agent_base = find_dp(traverse(self.source_datapipe),AgentBase)
        self.device = device
    
    def __iter__(self):
        for x in self.source_datapipe:
            if self.device is not None: x = x.to(torch.device(self.device))
            if len(x.shape)==1: x = x.unsqueeze(0)
            yield self.agent_base.model(x)

Check that the 1x4 tensor assuccessfully pushes through the model can get expected outputs...

In [8]:
torch.manual_seed(0)

from torch.nn import *

from fastrl.torch_core import *

class DQN(Module):
    def __init__(self,state_sz:int,action_sz:int,hidden=512):
        self.layers=Sequential(
            Linear(state_sz,hidden),
            ReLU(),
            Linear(hidden,action_sz),
        )
    def forward(self,x): return self.layers(x)


In [9]:
# from fastrl.agents.dqn.basic import DQN
# Setup up the core NN
torch.manual_seed(0)
model = DQN(4,2)
# Setup the agent
agent = AgentBase(model)
agent = SimpleModelRunner(agent)
agent = AgentHead(agent)

In [10]:
input_tensor = tensor([1,2,3,4]).float()

for action in agent([input_tensor]):
    print(action)
    
test_eq(input_tensor,tensor([1., 2., 3., 4.]))

tensor([[-0.2909, -1.0357]], grad_fn=<AddmmBackward0>)


In [11]:
#|export
class StepFieldSelector(dp.iter.IterDataPipe):
    "Grabs `field` from `source_datapipe` to push to the rest of the pipeline."
    def __init__(self,
         source_datapipe, # datapipe whose next(source_datapipe) -> `StepType`
         field='state' # A field in `StepType` to grab
        ): 
        # TODO: support multi-fields
        self.source_datapipe = source_datapipe
        self.field = field
    
    def __iter__(self):
        for step in self.source_datapipe:
            if not issubclass(step.__class__,StepType):
                raise Exception(f'Expected typing.NamedTuple object got {type(step)}\n{step}')
            yield getattr(step,self.field)

Check that using `StepFieldSelector`, we can grab the `state` field from the `Simplestep` to push through the model...

In [12]:
agent = AgentBase(model)
agent = StepFieldSelector(agent,field='state')
agent = SimpleModelRunner(agent)
agent = AgentHead(agent)

for action in agent([SimpleStep.random(state=tensor([1.,2.,3.,4.]))]):
    print(action)

tensor([[-0.2909, -1.0357]], grad_fn=<AddmmBackward0>)


In [13]:
# Setup up the core NN
torch.manual_seed(0)
model = DQN(4,2)
# Setup the agent
agent = AgentBase(model,[])
# All the things that make this agent unique and special
# In this instance, all this module does is pass the action directly through to the model.
agent = SimpleModelRunner(agent)
# Bring everything together into the AgentHead where actions will be passed and then run through the pipeline
agent = AgentHead(agent)

If we pass a list of tensors, we will get a list of actions:

In [14]:
for action in agent([tensor([1,2,3,4]).float()]):
    print(action)

tensor([[-0.2909, -1.0357]], grad_fn=<AddmmBackward0>)


In [15]:
for action in agent([tensor([1,2,3,4]).float()]*3):
    print(action)
traverse(agent); # Check that we can traverse it

tensor([[-0.2909, -1.0357]], grad_fn=<AddmmBackward0>)
tensor([[-0.2909, -1.0357]], grad_fn=<AddmmBackward0>)
tensor([[-0.2909, -1.0357]], grad_fn=<AddmmBackward0>)


In [16]:
from fastrl.pipes.core import *
from fastrl.pipes.map.transforms import *
from fastrl.data.block import *
from fastrl.envs.gym import *

def baseline_test(envs,total_steps,seed=0):
    pipe = dp.map.Mapper(envs)
    pipe = TypeTransformer(pipe,[GymTypeTransform])
    pipe = dp.iter.MapToIterConverter(pipe)
    pipe = dp.iter.InMemoryCacheHolder(pipe)
    pipe = pipe.cycle()
    pipe = GymStepper(pipe,seed=seed)

    steps = [step for _,step in zip(*(range(total_steps),pipe))]
    return steps, pipe


In [17]:
steps, pipe = baseline_test(['CartPole-v1'],0)

In [18]:
#|export
class StepModelFeeder(dp.iter.IterDataPipe):
    def __init__(self,
                 source_datapipe, # next() must produce a `StepType`,
                 keys:List[str] # A list of field names to grab and push into `self.agent_base.model`
                ): 
        self.source_datapipe = source_datapipe
        self.keys = keys
        self.agent_base = find_agent_base(self.source_datapipe)

    def __iter__(self):
        for o in self.source_datapipe: 
            
            if not issubclass(b.__class__,StepType):
                raise Exception(f'Expected {StepType} object got {type(step)}\n{step}')
            
            tensors = tuple(getattr(o,k) for k in self.keys)
            
            try: yield self.agent_base.model(tensors)
            except Exception:
                print('Failed on ',o)
                raise
        
add_docs(
    StepModelFeeder,
    """Converts `StepTypes` into unified tensors using `keys` and feeds them into `self.agent_base.model`
    """
)  
    

In [19]:
#|hide
#|eval: false
from fastcore.imports import in_colab

# Since colab still requires tornado<6, we don't want to import nbdev if we don't have to
if not in_colab():
    from nbdev import nbdev_export
    nbdev_export()

