In [1]:
#|hide
#|eval: false
! [ -e /content ] && pip install -Uqq fastrl['dev'] pyvirtualdisplay && \
                     apt-get install -y xvfb python-opengl > /dev/null 2>&1 
# NOTE: IF YOU SEE VERSION ERRORS, IT IS SAFE TO IGNORE THEM. COLAB IS BEHIND IN SOME OF THE PACKAGE VERSIONS

In [2]:
#|hide
#|eval: false
from fastcore.imports import in_colab
# Since colab still requires tornado<6, we don't want to import nbdev if we don't have to
if not in_colab():
    from nbdev.showdoc import *
    from nbdev.imports import *
    if not os.environ.get("IN_TEST", None):
        assert IN_NOTEBOOK
        assert not IN_COLAB
        assert IN_IPYTHON
else:
    # Virutual display is needed for colab
    from pyvirtualdisplay import Display
    display = Display(visible=0, size=(400, 300))
    display.start()

In [3]:
#|default_exp pipes.iter.firstlast

In [4]:
#|export
# Python native modules
import os
from warnings import warn
# Third party libs
from fastcore.all import *
import torchdata.datapipes as dp
import typing
from fastai.torch_basics import *
from fastai.torch_core import *
# Local modules
from fastrl.core import *
from fastrl.pipes.core import *
from fastrl.data.block import *
from fastrl.pipes.core import *

# FirstLast
> DataPipe for merging multiple `StepTypes` into a single one by keeping the first and last steps.

In [5]:
#|export
class FirstLastMerger(dp.iter.IterDataPipe):
    def __init__(self, 
                 source_datapipe, 
                 gamma:float=0.99
        ):
        self.source_datapipe = source_datapipe
        self.gamma = gamma
        
    def __iter__(self) -> StepType:
        self.env_buffer = {}
        for steps in self.source_datapipe:
            if not isinstance(steps,(list,tuple)):
                raise ValueError(f'Expected {self.source_datapipe} to return a list/tuple of steps, however got {type(steps)}')
                
            if len(steps)==1:
                yield steps[0]
                continue
                
            fstep,lstep = steps[0],steps[-1]
            
            reward = fstep.reward
            for step in steps[1:]:
                reward*=self.gamma
                reward+=step.reward
                
            yield SimpleStep(
                state=tensor(fstep.state),
                next_state=tensor(lstep.next_state),
                action=fstep.action,
                terminated=lstep.terminated,
                truncated=lstep.truncated,
                reward=reward,
                total_reward=lstep.total_reward,
                env_id=lstep.env_id,
                proc_id=lstep.proc_id,
                step_n=lstep.step_n,
                episode_n=fstep.episode_n,
                image=fstep.image
            )
                
add_docs(
    FirstLastMerger,
    """Takes multiple steps and converts them into a single step consisting of properties
    from the first and last steps. Reward is recalculated to factor in the multiple steps.""",
)

In [6]:
#|hide
# Used here to avoid UserWarnings related to gym complaining about bounding box / action space format.
# There must be a bug in the CartPole-v1 env that is causing this to show. Also couldnt figure out the 
# regex, so instead we filter on the lineno, which is line 98.
warnings.filterwarnings("ignore",category=UserWarning,lineno=98)

Below we see an example where we collect 2 steps for each env, **then** yield them. This is useful for
training models of larger chunks of env step output.

In [7]:
import pandas as pd
from fastrl.envs.gym import GymTypeTransform,GymStepper
from fastrl.pipes.iter.nstep import *

def first_last_test(envs,total_steps,n=1,seed=0):
    pipe = dp.map.Mapper(envs)
    pipe = TypeTransformLoop(pipe,[GymTypeTransform])
    pipe = dp.iter.MapToIterConverter(pipe)
    pipe = dp.iter.InMemoryCacheHolder(pipe)
    pipe = pipe.cycle()
    pipe = GymStepper(pipe,seed=seed)
    pipe = NStepper(pipe,n=n)
    pipe = FirstLastMerger(pipe)

    steps = [step for step,_ in zip(*(pipe,range(total_steps)))]
    return steps

steps = first_last_test(['CartPole-v1']*3,200,2,0)
pd.DataFrame(steps)[['state','next_state','env_id','terminated']][:10]

Unnamed: 0,state,next_state,env_id,terminated
0,"[tensor(0.0137), tensor(-0.0230), tensor(-0.0459), tensor(-0.0483)]","[tensor(0.0167), tensor(0.3685), tensor(-0.0540), tensor(-0.6622)]",tensor(140680598067088),tensor(False)
1,"[tensor(0.0137), tensor(-0.0230), tensor(-0.0459), tensor(-0.0483)]","[tensor(0.0167), tensor(0.3685), tensor(-0.0540), tensor(-0.6622)]",tensor(140680598068176),tensor(False)
2,"[tensor(0.0137), tensor(-0.0230), tensor(-0.0459), tensor(-0.0483)]","[tensor(0.0167), tensor(0.3685), tensor(-0.0540), tensor(-0.6622)]",tensor(140680598134928),tensor(False)
3,"[tensor(0.0132), tensor(0.1727), tensor(-0.0469), tensor(-0.3552)]","[tensor(0.0241), tensor(0.5643), tensor(-0.0672), tensor(-0.9714)]",tensor(140680598067088),tensor(False)
4,"[tensor(0.0132), tensor(0.1727), tensor(-0.0469), tensor(-0.3552)]","[tensor(0.0241), tensor(0.5643), tensor(-0.0672), tensor(-0.9714)]",tensor(140680598068176),tensor(False)
5,"[tensor(0.0132), tensor(0.1727), tensor(-0.0469), tensor(-0.3552)]","[tensor(0.0241), tensor(0.5643), tensor(-0.0672), tensor(-0.9714)]",tensor(140680598134928),tensor(False)
6,"[tensor(0.0167), tensor(0.3685), tensor(-0.0540), tensor(-0.6622)]","[tensor(0.0353), tensor(0.3702), tensor(-0.0866), tensor(-0.7006)]",tensor(140680598067088),tensor(False)
7,"[tensor(0.0167), tensor(0.3685), tensor(-0.0540), tensor(-0.6622)]","[tensor(0.0353), tensor(0.3702), tensor(-0.0866), tensor(-0.7006)]",tensor(140680598068176),tensor(False)
8,"[tensor(0.0167), tensor(0.3685), tensor(-0.0540), tensor(-0.6622)]","[tensor(0.0353), tensor(0.3702), tensor(-0.0866), tensor(-0.7006)]",tensor(140680598134928),tensor(False)
9,"[tensor(0.0241), tensor(0.5643), tensor(-0.0672), tensor(-0.9714)]","[tensor(0.0427), tensor(0.1763), tensor(-0.1007), tensor(-0.4364)]",tensor(140680598067088),tensor(False)


First, `NStepper(pipe,n=1)` with `FirstLastMerger` should be identical to a pipelines that never used it.

In [8]:
import pandas as pd
from fastrl.envs.gym import GymTypeTransform,GymStepper

pipe = dp.map.Mapper(['CartPole-v1']*3)
pipe = TypeTransformLoop(pipe,[GymTypeTransform])
pipe = dp.iter.MapToIterConverter(pipe)
pipe = dp.iter.InMemoryCacheHolder(pipe)
pipe = pipe.cycle()
pipe = GymStepper(pipe,seed=0)

no_n_steps = [step for step,_ in zip(*(pipe,range(10)))]
steps = first_last_test(['CartPole-v1']*3,10,1,0)

If `n=1` we should expect that regardless of the number of envs, both n-step and simple environment
pipelines should be identical.

In [9]:
test_len(steps,no_n_steps)
for field in ['next_state','state','terminated']:
    for i,(step,no_n_step) in enumerate(zip(steps,no_n_steps)): 
        test_eq(getattr(step,field),getattr(no_n_step,field))

In [10]:
#|export
def n_first_last_steps_expected(
    default_steps:int, # The number of steps the episode would run without n_steps
):
    return default_steps 
    
n_first_last_steps_expected.__doc__=r"""
This function doesnt do much for now. `FirstLastMerger` pretty much undoes the number of steps `nsteps` does.
"""    

In [11]:
expected_n_steps = n_first_last_steps_expected(default_steps=18)
print('Given the above values, we expect a single episode to be ',expected_n_steps,' steps long')
steps = first_last_test(['CartPole-v1']*1,expected_n_steps+1,2,0)
# The first episode should have ended on row 34, beign 35 steps long. The 36th row should be a new episode
test_eq(steps[-2].terminated,tensor([True]))
test_eq(steps[-2].episode_n,tensor([1]))
test_eq(steps[-2].step_n,tensor([18]))
test_eq(steps[-1].terminated,tensor([False]))
test_eq(steps[-1].episode_n,tensor([2]))
test_eq(steps[-1].step_n,tensor([2])) # Main difference, the "step" for the new episode will be 2 instead of 1

Given the above values, we expect a single episode to be  18  steps long


In [12]:
expected_n_steps = n_first_last_steps_expected(default_steps=18)
print('Given the above values, we expect a single episode to be ',expected_n_steps,' steps long')
steps = first_last_test(['CartPole-v1']*1,expected_n_steps+1,4,0)
# The first episode should have ended on row 34, beign 35 steps long. The 36th row should be a new episode
test_eq(steps[-2].terminated,tensor([True]))
test_eq(steps[-2].episode_n,tensor([1]))
test_eq(steps[-2].step_n,tensor([18]))
test_eq(steps[-1].terminated,tensor([False]))
test_eq(steps[-1].episode_n,tensor([2]))
test_eq(steps[-1].step_n,tensor([4]))

Given the above values, we expect a single episode to be  18  steps long


In [13]:
expected_n_steps = n_first_last_steps_expected(default_steps=18)
print('Given the above values, we expect a single episode to be ',expected_n_steps,' steps long')
steps = first_last_test(['CartPole-v1']*3,expected_n_steps*3+1,2,0)
# The first episode should have ended on row 34, beign 35 steps long. The 36th row should be a new episode
test_eq(steps[-2].terminated,tensor([True]))
test_eq(steps[-2].episode_n,tensor([1]))
test_eq(steps[-2].step_n,tensor([18]))
test_eq(steps[-1].terminated,tensor([False]))
test_eq(steps[-1].episode_n,tensor([2]))
test_eq(steps[-1].step_n,tensor([2]))

Given the above values, we expect a single episode to be  18  steps long


In [15]:
#|hide
#|eval: false
from fastcore.imports import in_colab

# Since colab still requires tornado<6, we don't want to import nbdev if we don't have to
if not in_colab():
    from nbdev import nbdev_export
    nbdev_export()