In [1]:
#|hide
#|eval: false
from fastcore.imports import in_colab
# Since colab still requires tornado<6, we don't want to import nbdev if we don't have to
if not in_colab():
    from nbdev.showdoc import *
    from nbdev.imports import *
    if not os.environ.get("IN_TEST", None):
        assert IN_NOTEBOOK
        assert not IN_COLAB
        assert IN_IPYTHON
else:
    # Virutual display is needed for colab
    from pyvirtualdisplay import Display
    display = Display(visible=0, size=(400, 300))
    display.start()

In [2]:
#|default_exp pipes.iter.nskip

In [3]:
#|export
# Python native modules
import os
# Third party libs
from fastcore.all import *
import torchdata.datapipes as dp
import typing
from fastai.torch_basics import *
from fastai.torch_core import *
# Local modules
from fastrl.core import *
from fastrl.pipes.core import *
from fastrl.pipes.iter.nstep import *
from fastrl.data.block import *

# NSkip
> DataPipe for skipping env steps env-wise.

In [4]:
#|export
_msg = """
NSkipper should not go after NStepper. Please make the order:

```python
...
pipe = NSkipper(pipe,n=3)
pipe = NStepper(pipe,n=3)
...
```

"""

class NSkipper(dp.iter.IterDataPipe):
    def __init__(self, source_datapipe, n=1) -> None:
        if isinstance(source_datapipe,NStepper): raise Exception(_msg)
        self.source_datapipe = source_datapipe
        self.n = n
        self.env_buffer = {}
        
    def __iter__(self) -> typing.NamedTuple:
        self.env_buffer = {}
        for step in self.source_datapipe:
            if not issubclass(step.__class__,StepType):
                raise Exception(f'Expected typing.NamedTuple object got {type(step)}\n{step}')
    
            env_id,terminated,step_n = int(step.env_id),bool(step.terminated),int(step.step_n)
        
            if env_id in self.env_buffer: self.env_buffer[env_id] += 1
            else:                         self.env_buffer[env_id] = 1
                
            if self.env_buffer[env_id]%self.n==0: yield step  
            elif terminated:                      yield step  
            elif step_n==1:                       yield step
            
            if terminated: self.env_buffer[env_id] = 1                
            
add_docs(
    NSkipper,
    """Accepts a `source_datapipe` or iterable whose `next()` produces a `typing.NamedTuple` that
       skips N steps for individual environments *while always producing 1st steps and terminated steps.*
    """,
)

In [5]:
#|hide
# Used here to avoid UserWarnings related to gym complaining about bounding box / action space format.
# There must be a bug in the CartPole-v1 env that is causing this to show. Also couldnt figure out the 
# regex, so instead we filter on the lineno, which is line 98.
warnings.filterwarnings("ignore",category=UserWarning,lineno=98)

Below we skip every other step given 3 envs while always keeping the 1st and terminated steps.

In [6]:
import pandas as pd
from fastrl.envs.gym import GymTypeTransform,GymStepper

def n_skip_test(envs,total_steps,n=1,seed=0):
    pipe = dp.map.Mapper(envs)
    pipe = TypeTransformLoop(pipe,[GymTypeTransform])
    pipe = dp.iter.MapToIterConverter(pipe)
    pipe = dp.iter.InMemoryCacheHolder(pipe)
    pipe = pipe.cycle()
    pipe = GymStepper(pipe,seed=seed)
    pipe = NSkipper(pipe,n=n)

    steps = [step for step,_ in zip(*(pipe,range(total_steps)))]
    return steps

steps = n_skip_test(['CartPole-v1']*3,200,2,0)
pd.DataFrame(steps)[['state','next_state','env_id','terminated']][:10]

Unnamed: 0,state,next_state,env_id,terminated
0,"[tensor(0.0137), tensor(-0.0230), tensor(-0.0459), tensor(-0.0483)]","[tensor(0.0132), tensor(0.1727), tensor(-0.0469), tensor(-0.3552)]",tensor(139998508568336),tensor(False)
1,"[tensor(0.0137), tensor(-0.0230), tensor(-0.0459), tensor(-0.0483)]","[tensor(0.0132), tensor(0.1727), tensor(-0.0469), tensor(-0.3552)]",tensor(139998508622096),tensor(False)
2,"[tensor(0.0137), tensor(-0.0230), tensor(-0.0459), tensor(-0.0483)]","[tensor(0.0132), tensor(0.1727), tensor(-0.0469), tensor(-0.3552)]",tensor(139998508623376),tensor(False)
3,"[tensor(0.0132), tensor(0.1727), tensor(-0.0469), tensor(-0.3552)]","[tensor(0.0167), tensor(0.3685), tensor(-0.0540), tensor(-0.6622)]",tensor(139998508568336),tensor(False)
4,"[tensor(0.0132), tensor(0.1727), tensor(-0.0469), tensor(-0.3552)]","[tensor(0.0167), tensor(0.3685), tensor(-0.0540), tensor(-0.6622)]",tensor(139998508622096),tensor(False)
5,"[tensor(0.0132), tensor(0.1727), tensor(-0.0469), tensor(-0.3552)]","[tensor(0.0167), tensor(0.3685), tensor(-0.0540), tensor(-0.6622)]",tensor(139998508623376),tensor(False)
6,"[tensor(0.0241), tensor(0.5643), tensor(-0.0672), tensor(-0.9714)]","[tensor(0.0353), tensor(0.3702), tensor(-0.0866), tensor(-0.7006)]",tensor(139998508568336),tensor(False)
7,"[tensor(0.0241), tensor(0.5643), tensor(-0.0672), tensor(-0.9714)]","[tensor(0.0353), tensor(0.3702), tensor(-0.0866), tensor(-0.7006)]",tensor(139998508622096),tensor(False)
8,"[tensor(0.0241), tensor(0.5643), tensor(-0.0672), tensor(-0.9714)]","[tensor(0.0353), tensor(0.3702), tensor(-0.0866), tensor(-0.7006)]",tensor(139998508623376),tensor(False)
9,"[tensor(0.0427), tensor(0.1763), tensor(-0.1007), tensor(-0.4364)]","[tensor(0.0463), tensor(-0.0172), tensor(-0.1094), tensor(-0.1771)]",tensor(139998508568336),tensor(False)


In [7]:
steps = n_skip_test(['CartPole-v1']*1,200,2,0)
pd.DataFrame(steps)[['state','next_state','env_id','terminated']][:10]

Unnamed: 0,state,next_state,env_id,terminated
0,"[tensor(0.0137), tensor(-0.0230), tensor(-0.0459), tensor(-0.0483)]","[tensor(0.0132), tensor(0.1727), tensor(-0.0469), tensor(-0.3552)]",tensor(139998507740880),tensor(False)
1,"[tensor(0.0132), tensor(0.1727), tensor(-0.0469), tensor(-0.3552)]","[tensor(0.0167), tensor(0.3685), tensor(-0.0540), tensor(-0.6622)]",tensor(139998507740880),tensor(False)
2,"[tensor(0.0241), tensor(0.5643), tensor(-0.0672), tensor(-0.9714)]","[tensor(0.0353), tensor(0.3702), tensor(-0.0866), tensor(-0.7006)]",tensor(139998507740880),tensor(False)
3,"[tensor(0.0427), tensor(0.1763), tensor(-0.1007), tensor(-0.4364)]","[tensor(0.0463), tensor(-0.0172), tensor(-0.1094), tensor(-0.1771)]",tensor(139998507740880),tensor(False)
4,"[tensor(0.0459), tensor(-0.2106), tensor(-0.1129), tensor(0.0792)]","[tensor(0.0417), tensor(-0.4040), tensor(-0.1113), tensor(0.3342)]",tensor(139998507740880),tensor(False)
5,"[tensor(0.0336), tensor(-0.5973), tensor(-0.1047), tensor(0.5899)]","[tensor(0.0217), tensor(-0.4009), tensor(-0.0929), tensor(0.2661)]",tensor(139998507740880),tensor(False)
6,"[tensor(0.0137), tensor(-0.2046), tensor(-0.0875), tensor(-0.0543)]","[tensor(0.0096), tensor(-0.0083), tensor(-0.0886), tensor(-0.3733)]",tensor(139998507740880),tensor(False)
7,"[tensor(0.0094), tensor(0.1879), tensor(-0.0961), tensor(-0.6926)]","[tensor(0.0132), tensor(0.3842), tensor(-0.1099), tensor(-1.0139)]",tensor(139998507740880),tensor(False)
8,"[tensor(0.0209), tensor(0.5806), tensor(-0.1302), tensor(-1.3390)]","[tensor(0.0325), tensor(0.7771), tensor(-0.1570), tensor(-1.6694)]",tensor(139998507740880),tensor(False)
9,"[tensor(0.0480), tensor(0.9737), tensor(-0.1904), tensor(-2.0066)]","[tensor(0.0675), tensor(1.1702), tensor(-0.2305), tensor(-2.3517)]",tensor(139998507740880),tensor(True)


## NSkipper Tests

There are a couple properties that we expect from `NSkipper`:

    - The 1st step should always be returned.
    - The terminated step should always be returned.
    - Every env should have its own steps skipped/kept

First, `NSkipper(pipe,n=1)` should be identical to a pipelines that never used it.

In [8]:
import pandas as pd
from fastrl.envs.gym import GymTypeTransform,GymStepper

pipe = dp.map.Mapper(['CartPole-v1']*3)
pipe = TypeTransformLoop(pipe,[GymTypeTransform])
pipe = dp.iter.MapToIterConverter(pipe)
pipe = dp.iter.InMemoryCacheHolder(pipe)
pipe = pipe.cycle()
pipe = GymStepper(pipe,seed=0)

no_n_skips = [step for step,_ in zip(*(pipe,range(60)))]
steps = n_skip_test(['CartPole-v1']*3,60,1,0)

If `n=1` we should expect that regardless of the number of envs, both n-step and simple environment
pipelines should be identical.

In [9]:
test_len(steps,no_n_skips)
for field in ['next_state','state','terminated']:
    for i,(step,no_n_step) in enumerate(zip(steps,no_n_skips)): 
        test_eq(getattr(step,field),getattr(no_n_step,field))

In [10]:
#|hide
# pd.set_option('display.max_rows', 500)
# pd.DataFrame(steps)[['state','next_state','env_id','done']]
# pd.DataFrame(no_n_skips)[['state','next_state','env_id','done']]

In [11]:
#|export
def n_skips_expected(
    default_steps:int, # The number of steps the episode would run without n_skips
    n:int # The n-skip value that we are planning to use
):
    if n==1: return default_steps # All the steps will eb retained including the 1st step. No offset needed
    # If n goes into default_steps evenly, then the final "done" will be technically an "extra" step
    elif default_steps%n==0: return (default_steps // n) + 1 # first step will be kept
    else:
        # If the steps dont divide evenly then it will attempt to skip done, but ofcourse, we dont
        # let that happen
        return (default_steps // n) + 2 # first step and done will be kept
    
n_skips_expected.__doc__=r"""
Produces the expected number of steps, assuming a fully deterministic episode based on `default_steps` and `n`

Given `n=2`, given 1 envs, knowing that `CartPole-v1` when `seed=0` will always run 18 steps, the total 
steps will be:

$$
18 // n + 1 (1st+last)
$$
"""    

In [12]:
expected_n_skips = n_skips_expected(default_steps=18,n=1)
print('Given the above values, we expect a single episode to be ',expected_n_skips,' steps long')
steps = n_skip_test(['CartPole-v1']*1,expected_n_skips+1,1,0)
# The first episode should have ended on row 34, beign 35 steps long. The 36th row should be a new episode
test_eq(steps[-2].terminated,tensor([True]))
test_eq(steps[-2].episode_n,tensor([1]))
test_eq(steps[-2].step_n,tensor([18]))
test_eq(steps[-1].terminated,tensor([False]))
test_eq(steps[-1].episode_n,tensor([2]))
test_eq(steps[-1].step_n,tensor([1]))

Given the above values, we expect a single episode to be  18  steps long


In [13]:
expected_n_skips = n_skips_expected(default_steps=18,n=2)
print('Given the above values, we expect a single episode to be ',expected_n_skips,' steps long')
steps = n_skip_test(['CartPole-v1']*1,expected_n_skips+1,2,0)
# The first episode should have ended on row 34, beign 35 steps long. The 36th row should be a new episode
test_eq(steps[-2].terminated,tensor([True]))
test_eq(steps[-2].episode_n,tensor([1]))
test_eq(steps[-2].step_n,tensor([18]))
test_eq(steps[-1].terminated,tensor([False]))
test_eq(steps[-1].episode_n,tensor([2]))
test_eq(steps[-1].step_n,tensor([1]))

Given the above values, we expect a single episode to be  10  steps long


In [14]:
expected_n_skips = n_skips_expected(default_steps=18,n=4)
print('Given the above values, we expect a single episode to be ',expected_n_skips,' steps long')
steps = n_skip_test(['CartPole-v1']*1,expected_n_skips+1,4,0)
# The first episode should have ended on row 34, beign 35 steps long. The 36th row should be a new episode
test_eq(steps[-2].terminated,tensor([True]))
test_eq(steps[-2].episode_n,tensor([1]))
test_eq(steps[-2].step_n,tensor([18]))
test_eq(steps[-1].terminated,tensor([False]))
test_eq(steps[-1].episode_n,tensor([2]))
test_eq(steps[-1].step_n,tensor([1]))

Given the above values, we expect a single episode to be  6  steps long


In [15]:
expected_n_skips = n_skips_expected(default_steps=18,n=2)
print('Given the above values, we expect a single episode to be ',expected_n_skips,' steps long')
steps = n_skip_test(['CartPole-v1']*3,expected_n_skips*3+1,2,0)
# The first episode should have ended on row 34, beign 35 steps long. The 36th row should be a new episode
test_eq(steps[-2].terminated,tensor([True]))
test_eq(steps[-2].episode_n,tensor([1]))
test_eq(steps[-2].step_n,tensor([18]))
test_eq(steps[-1].terminated,tensor([False]))
test_eq(steps[-1].episode_n,tensor([2]))
test_eq(steps[-1].step_n,tensor([1]))

Given the above values, we expect a single episode to be  10  steps long


In [16]:
#|hide
#|eval: false
from fastcore.imports import in_colab

# Since colab still requires tornado<6, we don't want to import nbdev if we don't have to
if not in_colab():
    from nbdev import nbdev_export
    nbdev_export()

