In [None]:
#hide
#skip
! [ -e /content ] && pip install -Uqq fastrl['dev']  # upgrade fastrl on colab

In [None]:
# hide
from fastcore.imports import in_colab

# Since colab still requires tornado<6, we don't want to import nbdev if we don't have to
if not in_colab():
    from nbdev.showdoc import *
    from nbdev.imports import *
    if not os.environ.get("IN_TEST", None):
        assert IN_NOTEBOOK
        assert not IN_COLAB
        assert IN_IPYTHON

In [None]:
# export
# Python native modules
import os,warnings
# Third party libs
from fastcore.all import *
from fastai.torch_core import *
from fastai.basics import *
import pandas as pd
import torch
import numpy as np
# Local modules

In [None]:
# default_exp core

# Core
> Core libs for fastrl

## D
> A better dictionary

In [None]:
# export
def map_dict_ex(d,f,*args,gen=False,wise=None,**kwargs):
    "Like `map`, but for dicts and uses `bind`, and supports `str` and indexing"
    g = (bind(f,*args,**kwargs) if callable(f)
         else f.format if isinstance(f,str)
         else f.__getitem__)

    if wise is None:  return map(g,d.items())
    return ((k,g(v)) if wise=='value' else (g(k),v) for k,v in d.items())

Check that general mapping for dicts works nicely...

In [None]:
test_dict={'a':1,'b':2,'c':3}
test_eq(dict(map_dict_ex(test_dict,lambda t:(t[0]+'_new',t[1]+1))),{'a_new':2,'b_new':3,'c_new':4})

Check that key and value wise mapping works correctly...

In [None]:
test_eq(dict(map_dict_ex(test_dict,lambda k:k+'_new',wise='key')),{'a_new':1,'b_new':2,'c_new':3})
test_eq(dict(map_dict_ex(test_dict,lambda v:v+1,wise='value')),{'a':2,'b':3,'c':4})

In [None]:
# export
_error_msg='Found idxs: %s have values more than %s e.g.: %s'

class D(dict):
    "Improved version of `dict` with array handling abilities"
    def __init__(self,*args,mapping=False,**kwargs):
        self.mapping=mapping
        super().__init__(*args,**kwargs)
        
    def eq_k(self,o:'D',with_diff=False):
        eq=set(o.keys())==set(self.keys())
        if with_diff: return eq,set(o.keys()).symmetric_difference(set(self.keys()))
        return eq
    def _new(self,*args,**kwargs): return type(self)(*args,**kwargs)
    
    def map(self,f,*args,gen=False,**kwargs): 
        return (self._new,noop)[gen](map_dict_ex(self,f,*args,**kwargs),mapping=True)
    def mapk(self,f,*args,gen=False,wise='key',**kwargs):
        return self.map(f,*args,gen=gen,wise=wise,**kwargs)
    def mapv(self,f,*args,gen=False,wise='value',**kwargs):
        return self.map(f,*args,gen=gen,wise=wise,**kwargs)

In [None]:
test_dict=D({'a':1,'b':2,'c':3})
test_eq(test_dict.map(lambda t:(t[0]+'_new',t[1]+1)),{'a_new':2,'b_new':3,'c_new':4})
test_eq(isinstance(test_dict.map(lambda t:(t[0]+'_new',t[1]+1),gen=True),map),True)
test_eq(dict(test_dict.map(lambda t:(t[0]+'_new',t[1]+1),gen=True)),{'a_new':2,'b_new':3,'c_new':4})

test_eq(test_dict.mapk(lambda k:k+'_new'),{'a_new':1,'b_new':2,'c_new':3})
test_eq(dict(test_dict.mapk(lambda k:k+'_new',gen=True)),{'a_new':1,'b_new':2,'c_new':3})

test_eq(test_dict.mapv(lambda v:v+1,wise='value'),{'a':2,'b':3,'c':4})
test_eq(dict(test_dict.mapv(lambda v:v+1,gen=True,wise='value')),{'a':2,'b':3,'c':4})

`BD` is the primary data structure that `fastrl` uses. It allows for easily iterating and doing operations on steps attained from environments.

## BD 
> A batch wise dictionary that requires all values to be numpy,tensor, or None.

We need to change any indexer that is passed. We don't know if the indexer is going to
be a numpy array, slice, tensor, or int.
All we know is 2 things:
- If it is an int, the batch dim will disappear
- If it is an indexer, then the batch dim will stay, but be smaller

In [None]:
# export
def tensor2shape(k,t:Tensor):
    "Converts a tensor into a dict of shapes, or a 1d numpy array"
    return {
        k:t.numpy().reshape(-1,) if len(t.shape)==2 and t.shape[1]==1 else 
        [str(t.shape)]*t.shape[0]
    }

`tensor2shape` is a function for preparing tensors for showing in pandas. For example
if we have a tensor that has 5 dimensions, it would be very hard to read if displayed in pandas

In [None]:
test_eq(tensor2shape('test',torch.randn(3,5)),
       {'test': ['torch.Size([3, 5])', 'torch.Size([3, 5])', 'torch.Size([3, 5])']})

If the tensor has only 1 channel, then we can show its literal value...

In [None]:
test_eq(tensor2shape('test',torch.tensor([[1],[2],[3]]))['test'],
        {'test': np.array([1, 2, 3])}['test'])

In [None]:
# export
def tensor2mu(k,t:Tensor): return {f'{k}_mu':t.reshape(t.shape[0],-1).double().mean(axis=1)}
tensor2mu.__docs__="Returns a dict with key `k`_mu with the mean of `t` batchwise "

Outputs a dictionary that has the mean of the tensor. The returned dictionary's keys 
have the naming convention: *[k]_mu*.

In [None]:
o=torch.randn(3,5)
test_eq(tensor2mu('test',o)['test_mu'],{'test_mu': o.double().mean(axis=1)}['test_mu'])

In [None]:
# export
def obj2tensor(o):
    return (o if isinstance(o,Tensor) else
            Tensor(o) if isinstance(o,(L,list,np.ndarray)) else
            Tensor([o]))    

After passing through the init, we convert all values into tensors.
The expected batchsize is 1, however increments based on merges/adds.
And convert shapes on 3 different conditions:
```python
    - bs==1 and len shape < 2 and shape[0]==bs, then reshape(-1,1)
    - bs==1 and len shape < 2 and shape[0]!=bs, then unsqueeze(0)
    - bs==1 and len shape > 1 and shape[0]==bs, then pass
    - bs==1 and len shape > 1 and shape[0]!=bs, then unsqueeze(0)
```
If bs!=1 then BS will have limited reshaping behavior. 
```python
    - bs!=1 and len shape < 2 and shape[0]==bs, then reshape(-1,1)
    - bs!=1 and len shape < 2 and shape[0]!=bs, then raise error
    - bs!=1 and len shape > 1 and shape[0]==bs, then pass
    - bs!=1 and len shape > 1 and shape[0]==1 and shape[1]==bs, then squeeze(0)
    - bs!=1 and len shape > 1 and shape[0],shape[1]!=bs, then raise error
```

In [None]:
# export
class UnCollateble(Exception):pass
def collate(v,bs):
    if bs==1:
        if len(v.shape)==1 and v.shape[0]==bs: return v.reshape(-1,1)
        if len(v.shape)==1 and v.shape[0]!=bs: return v.unsqueeze(0)
        if len(v.shape)>1 and v.shape[0]==bs:  return v
        if len(v.shape)>1 and v.shape[0]!=bs:  return v.unsqueeze(0)
    else:
        if len(v.shape)==1 and v.shape[0]==bs: return v.reshape(-1,1)
        if len(v.shape)==1 and v.shape[0]!=bs: 
            raise UnCollateble(f'Cant handle {v} since bs:{bs} shape:{v.shape} are incompatible')
        if len(v.shape)>1 and v.shape[0]==bs:  return v 
        if len(v.shape)>1 and v.shape[0]==1 and v.shape[1]==bs: return v.squeeze(0)  
        if len(v.shape)>1 and bs not in v.shape[:2]: 
            raise UnCollateble(f'Cant handle {v} since bs:{bs} shape:{v.shape} are incompatible')

class BD(D):
    def __init__(self,*args,v=False,bs=1,**kwargs):
        store_attr()
        super().__init__(*args,**kwargs)
        if not self.mapping: self.update(self.mapv(obj2tensor))
        if self.bs is None: self.bs=list(self.values())[0].shape[0]
        if not self.mapping: self.update(self.mapv(collate,bs=self.bs))
    
    def __radd__(self,o): return self if isinstance(o,int) else self.__add__(o) 
    def __add__(self,o):
        return BD({k:torch.vstack((self[k],o[k])) for k in self},bs=self.bs+o.bs)
    def __getitem__(self,o):
        if isinstance(o,int) or is_listy(o) or isinstance(o,Tensor): 
            bs=1 if isinstance(o,int) else None
            return type(self)({k:self[k][o] for k in self},bs=bs)
        return super().__getitem__(o)
    @classmethod
    def merge(cls,*ds,**kwargs): return cls(merge(*ds),**kwargs)
    
    @delegates(pd.DataFrame)
    def pandas(self,mu=False,**kwargs):
        "Turns a `BD` into a pandas Dataframe optionally showing `mu` of values."
        return pd.DataFrame(merge(
            *tuple(tensor2shape(k,v) for k,v in self.items()),
            *(tuple(tensor2mu(k,v) for k,v in self.items()) if mu else ())
        ),**kwargs)

Probably the most important thing for `BD` is to allow as many ways of initializing as possible.

In [None]:
# Typical dictionary with an image field.
example_1=lambda: {'state':np.random.rand(4,),'next_state':torch.rand(4,),
                'action':[np.random.randint(0,2)],'reward':[np.random.ranf()],
                'steps':[np.random.randint(0,20)],'episode_reward':[np.random.randint(5,40)],
                'env':[np.random.randint(5,40)],'done':[np.random.randint(0,2)==0],
                'image':torch.rand(5,5,3)}
# If some fields have different shape
example_2=lambda: {'state':np.random.rand(1,4),'next_state':torch.rand(1,4),
                'action':L([np.random.randint(0,2)]),
                'reward':L([np.random.ranf()]),
                'steps':L([np.random.randint(0,20)]),
                'episode_reward':L([np.random.randint(5,40)]),
                'env':L([np.random.randint(5,40)]),
                'done':L([np.random.randint(0,2)==0]),
                'image':torch.rand(1,5,5,3)}
# Single dict with has bs > 3
example_3=lambda: {'state':np.random.rand(3,4),'next_state':torch.rand(3,4),
                'action':L.range(3).map(lambda _:np.random.randint(0,2)),
                'reward':L.range(3).map(lambda _:np.random.ranf()),
                'steps':L.range(3).map(lambda _:np.random.randint(0,20)),
                'episode_reward':L.range(3).map(lambda _:np.random.randint(5,40)),
                'env':L.range(3).map(lambda _:np.random.randint(5,40)),
                'done':L.range(3).map(lambda _:np.random.randint(0,2)==0),
                'image':torch.rand(3,5,5,3)}

functions={'example 1':example_1,'example 2':example_2,'example 3':example_3}

Lets check what happens when a single dict is passed into the `__init__`

In [None]:
for k,f in functions.items():
    print(k)
    display(BD(f()).pandas())

example 1


Unnamed: 0,state,next_state,action,reward,steps,episode_reward,env,done,image
0,"torch.Size([1, 4])","torch.Size([1, 4])",1.0,0.316291,2.0,36.0,30.0,1.0,"torch.Size([1, 5, 5, 3])"


example 2


Unnamed: 0,state,next_state,action,reward,steps,episode_reward,env,done,image
0,"torch.Size([1, 4])","torch.Size([1, 4])",0.0,0.150571,18.0,7.0,8.0,0.0,"torch.Size([1, 5, 5, 3])"


example 3


Unnamed: 0,state,next_state,action,reward,steps,episode_reward,env,done,image
0,"torch.Size([1, 3, 4])","torch.Size([1, 3, 4])","torch.Size([1, 3])","torch.Size([1, 3])","torch.Size([1, 3])","torch.Size([1, 3])","torch.Size([1, 3])","torch.Size([1, 3])","torch.Size([1, 3, 5, 5, 3])"


Hm seems like the shape above is not what we want. `BS` will only do limited reshaping/fixing.
Although it could possibly check the shapes if they are all the same batch, it is likely better for the
user define specifically what batch size they are expecting.

In [None]:
BD(example_3(),bs=3).pandas()

Unnamed: 0,state,next_state,action,reward,steps,episode_reward,env,done,image
0,"torch.Size([3, 4])","torch.Size([3, 4])",0.0,0.747427,15.0,18.0,38.0,1.0,"torch.Size([3, 5, 5, 3])"
1,"torch.Size([3, 4])","torch.Size([3, 4])",0.0,0.958449,5.0,19.0,14.0,0.0,"torch.Size([3, 5, 5, 3])"
2,"torch.Size([3, 4])","torch.Size([3, 4])",1.0,0.816427,4.0,13.0,13.0,1.0,"torch.Size([3, 5, 5, 3])"


As expected, passing dicts into `BD` kwargs should also work...

In [None]:
for k,f in functions.items():
    print(k)
    display(BD(**f()).pandas())

example 1


Unnamed: 0,state,next_state,action,reward,steps,episode_reward,env,done,image
0,"torch.Size([1, 4])","torch.Size([1, 4])",0.0,0.607529,3.0,15.0,24.0,1.0,"torch.Size([1, 5, 5, 3])"


example 2


Unnamed: 0,state,next_state,action,reward,steps,episode_reward,env,done,image
0,"torch.Size([1, 4])","torch.Size([1, 4])",1.0,0.591736,5.0,38.0,38.0,1.0,"torch.Size([1, 5, 5, 3])"


example 3


Unnamed: 0,state,next_state,action,reward,steps,episode_reward,env,done,image
0,"torch.Size([1, 3, 4])","torch.Size([1, 3, 4])","torch.Size([1, 3])","torch.Size([1, 3])","torch.Size([1, 3])","torch.Size([1, 3])","torch.Size([1, 3])","torch.Size([1, 3])","torch.Size([1, 3, 5, 5, 3])"


Passing in itemwise tuples should also not be a problem...

In [None]:
for k,f in functions.items():
    print(k)
    display(BD(((k,v) for k,v in f().items())).pandas())

example 1


Unnamed: 0,state,next_state,action,reward,steps,episode_reward,env,done,image
0,"torch.Size([1, 4])","torch.Size([1, 4])",0.0,0.567727,17.0,24.0,13.0,0.0,"torch.Size([1, 5, 5, 3])"


example 2


Unnamed: 0,state,next_state,action,reward,steps,episode_reward,env,done,image
0,"torch.Size([1, 4])","torch.Size([1, 4])",0.0,0.133824,13.0,33.0,13.0,0.0,"torch.Size([1, 5, 5, 3])"


example 3


Unnamed: 0,state,next_state,action,reward,steps,episode_reward,env,done,image
0,"torch.Size([1, 3, 4])","torch.Size([1, 3, 4])","torch.Size([1, 3])","torch.Size([1, 3])","torch.Size([1, 3])","torch.Size([1, 3])","torch.Size([1, 3])","torch.Size([1, 3])","torch.Size([1, 3, 5, 5, 3])"


In [None]:
for k,f in functions.items():
    original_bd=sum((BD(example_1()) for _ in range(20)))
    print(k)
    display(original_bd[[2,5,8]].pandas())

example 1


Unnamed: 0,state,next_state,action,reward,steps,episode_reward,env,done,image
0,"torch.Size([3, 4])","torch.Size([3, 4])",1.0,0.778721,18.0,11.0,16.0,1.0,"torch.Size([3, 5, 5, 3])"
1,"torch.Size([3, 4])","torch.Size([3, 4])",0.0,0.240419,14.0,8.0,38.0,0.0,"torch.Size([3, 5, 5, 3])"
2,"torch.Size([3, 4])","torch.Size([3, 4])",1.0,0.959504,18.0,7.0,11.0,0.0,"torch.Size([3, 5, 5, 3])"


example 2


Unnamed: 0,state,next_state,action,reward,steps,episode_reward,env,done,image
0,"torch.Size([3, 4])","torch.Size([3, 4])",1.0,0.457843,16.0,27.0,10.0,0.0,"torch.Size([3, 5, 5, 3])"
1,"torch.Size([3, 4])","torch.Size([3, 4])",1.0,0.011371,7.0,27.0,20.0,1.0,"torch.Size([3, 5, 5, 3])"
2,"torch.Size([3, 4])","torch.Size([3, 4])",0.0,0.826068,2.0,38.0,9.0,1.0,"torch.Size([3, 5, 5, 3])"


example 3


Unnamed: 0,state,next_state,action,reward,steps,episode_reward,env,done,image
0,"torch.Size([3, 4])","torch.Size([3, 4])",1.0,0.681567,8.0,28.0,37.0,1.0,"torch.Size([3, 5, 5, 3])"
1,"torch.Size([3, 4])","torch.Size([3, 4])",0.0,0.078813,3.0,5.0,29.0,0.0,"torch.Size([3, 5, 5, 3])"
2,"torch.Size([3, 4])","torch.Size([3, 4])",0.0,0.940886,17.0,36.0,11.0,1.0,"torch.Size([3, 5, 5, 3])"


It is also important that when we sum, that the batch size make sense...

In [None]:
test_d=sum((BD(example_1()) for _ in range(20)))
test_eq(test_d.bs,20)

Check that indexing works as well as the adjusting of the batch size...

In [None]:
test_eq(type(test_d['state']),Tensor)
test_eq(type(test_d[5]),BD)
test_eq(test_d[5:8].bs,3)
test_d[5:8].pandas()

Unnamed: 0,state,next_state,action,reward,steps,episode_reward,env,done,image
0,"torch.Size([3, 4])","torch.Size([3, 4])",1.0,0.883429,5.0,8.0,24.0,1.0,"torch.Size([3, 5, 5, 3])"
1,"torch.Size([3, 4])","torch.Size([3, 4])",0.0,0.335055,13.0,18.0,37.0,0.0,"torch.Size([3, 5, 5, 3])"
2,"torch.Size([3, 4])","torch.Size([3, 4])",1.0,0.352697,19.0,17.0,32.0,1.0,"torch.Size([3, 5, 5, 3])"


In [None]:
test_d[:8].pandas()

Unnamed: 0,state,next_state,action,reward,steps,episode_reward,env,done,image
0,"torch.Size([8, 4])","torch.Size([8, 4])",1.0,0.769159,9.0,16.0,21.0,0.0,"torch.Size([8, 5, 5, 3])"
1,"torch.Size([8, 4])","torch.Size([8, 4])",1.0,0.665282,7.0,22.0,35.0,1.0,"torch.Size([8, 5, 5, 3])"
2,"torch.Size([8, 4])","torch.Size([8, 4])",1.0,0.773795,0.0,18.0,16.0,1.0,"torch.Size([8, 5, 5, 3])"
3,"torch.Size([8, 4])","torch.Size([8, 4])",1.0,0.780645,17.0,21.0,17.0,1.0,"torch.Size([8, 5, 5, 3])"
4,"torch.Size([8, 4])","torch.Size([8, 4])",0.0,0.462467,6.0,18.0,27.0,0.0,"torch.Size([8, 5, 5, 3])"
5,"torch.Size([8, 4])","torch.Size([8, 4])",1.0,0.883429,5.0,8.0,24.0,1.0,"torch.Size([8, 5, 5, 3])"
6,"torch.Size([8, 4])","torch.Size([8, 4])",0.0,0.335055,13.0,18.0,37.0,0.0,"torch.Size([8, 5, 5, 3])"
7,"torch.Size([8, 4])","torch.Size([8, 4])",1.0,0.352697,19.0,17.0,32.0,1.0,"torch.Size([8, 5, 5, 3])"


In [None]:
test_d[Tensor([0]).long()].pandas()

Unnamed: 0,state,next_state,action,reward,steps,episode_reward,env,done,image
0,"torch.Size([1, 4])","torch.Size([1, 4])",1.0,0.769159,9.0,16.0,21.0,0.0,"torch.Size([1, 5, 5, 3])"


In [None]:
test_eq(BD(test_d,bs=20)['state'].shape,(20,4))

In [None]:
# hide
from fastcore.imports import in_colab

# Since colab still requires tornado<6, we don't want to import nbdev if we don't have to
if not in_colab():
    from nbdev.export import *
    from nbdev.export2html import *
    from nbdev.cli import make_readme
    make_readme()
    notebook2script()
    notebook2html()

converting /home/fastrl_user/fastrl/nbs/index.ipynb to README.md
Converted 00_core.ipynb.
Converted 00_nbdev_extension.ipynb.
Converted 05_data.block.ipynb.
Converted 05_data.test_async.ipynb.
Converted 20_test_utils.ipynb.
Converted index.ipynb.
Converted nbdev_template.ipynb.
converting: /home/fastrl_user/fastrl/nbs/00_core.ipynb
