In [None]:
#hide
#skip
! [ -e /content ] && pip install -Uqq fastrl['dev']  # upgrade fastrl on colab

In [None]:
# hide
from fastcore.imports import in_colab

# Since colab still requires tornado<6, we don't want to import nbdev if we don't have to
if not in_colab():
    from nbdev.showdoc import *
    from nbdev.imports import *
    if not os.environ.get("IN_TEST", None):
        assert IN_NOTEBOOK
        assert not IN_COLAB
        assert IN_IPYTHON

In [None]:
# default_exp core

In [None]:
# export
# Python native modules
import os,warnings
# Third party libs
from fastcore.all import *
from fastai.torch_core import *
from fastai.basics import *
import pandas as pd
import torch
import numpy as np
# Local modules

In [None]:
# hide

# Core
> Core libs for fastrl

## D
> A better dictionary

In [None]:
# export
def isnone(v): return v is None

In [None]:
# export
def map_dict_ex(d,f,*args,gen=False,wise=None,**kwargs):
    "Like `map`, but for dicts and uses `bind`, and supports `str` and indexing"
    g = (bind(f,*args,**kwargs) if callable(f)
         else f.format if isinstance(f,str)
         else f.__getitem__)

    if wise is None:  return map(g,d.items())
    return ((k,g(v)) if wise=='value' else (g(k),v) for k,v in d.items())

Check that general mapping for dicts works nicely...

In [None]:
test_dict={'a':1,'b':2,'c':3}
test_eq(dict(map_dict_ex(test_dict,lambda t:(t[0]+'_new',t[1]+1))),{'a_new':2,'b_new':3,'c_new':4})

Check that key and value wise mapping works correctly...

In [None]:
test_eq(dict(map_dict_ex(test_dict,lambda k:k+'_new',wise='key')),{'a_new':1,'b_new':2,'c_new':3})
test_eq(dict(map_dict_ex(test_dict,lambda v:v+1,wise='value')),{'a':2,'b':3,'c':4})

In [None]:
# export
_error_msg='Found idxs: %s have values more than %s e.g.: %s'

class D(dict):
    "Improved version of `dict` with array handling abilities"
    def __init__(self,*args,mapping=False,**kwargs):
        self.mapping=mapping
        super().__init__(*args,**kwargs)
        
    def eq_k(self,o:'D',with_diff=False):
        eq=set(o.keys())==set(self.keys())
        if with_diff: return eq,set(o.keys()).symmetric_difference(set(self.keys()))
        return eq
    def _new(self,*args,**kwargs): return type(self)(*args,**kwargs)

    def map(self,f,*args,gen=False,**kwargs): 
        return (self._new,noop)[gen](map_dict_ex(self,f,*args,**kwargs),mapping=True)
    def mapk(self,f,*args,gen=False,wise='key',**kwargs):
        return self.map(f,*args,gen=gen,wise=wise,**kwargs)
    def mapv(self,f,*args,gen=False,wise='value',**kwargs):
        return self.map(f,*args,gen=gen,wise=wise,**kwargs)

In [None]:
test_dict=D({'a':1,'b':2,'c':3})
test_eq(test_dict.map(lambda t:(t[0]+'_new',t[1]+1)),{'a_new':2,'b_new':3,'c_new':4})
test_eq(isinstance(test_dict.map(lambda t:(t[0]+'_new',t[1]+1),gen=True),map),True)
test_eq(dict(test_dict.map(lambda t:(t[0]+'_new',t[1]+1),gen=True)),{'a_new':2,'b_new':3,'c_new':4})

test_eq(test_dict.mapk(lambda k:k+'_new'),{'a_new':1,'b_new':2,'c_new':3})
test_eq(dict(test_dict.mapk(lambda k:k+'_new',gen=True)),{'a_new':1,'b_new':2,'c_new':3})

test_eq(test_dict.mapv(lambda v:v+1,wise='value'),{'a':2,'b':3,'c':4})
test_eq(dict(test_dict.mapv(lambda v:v+1,gen=True,wise='value')),{'a':2,'b':3,'c':4})

`BD` is the primary data structure that `fastrl` uses. It allows for easily iterating and doing operations on steps attained from environments.

## BD 
> A batch wise dictionary that requires all values to be numpy,tensor, or None.

### Utilities

#### Stacking API
> We use the fastcore typedispatch decortors to select the currect method of stacking arrays.

In [None]:
# export
@typedispatch
def stack(a:Tensor,b:Tensor): return torch.vstack((a,b))
@typedispatch
def stack(a:(np.array,np.ndarray),b:(np.array,np.ndarray)): return np.vstack((a,b))

#### Exceptions 

In [None]:
# export 
class BDTypeError(Exception):
    def __init__(self,k,v):
        self.message=f'Key:{k} has value {type(v)} when it should have type Tensor, or None'
        super().__init__(self.message)
    @classmethod
    def check(cls,d:dict,strict=False): 
        for k,v in d.items():
            if isinstance(k,int): raise TypeError(f'{k} cannot be an int since ints can be used to index.')
            types=(Tensor,NoneType)
            if not strict:types=(*types,list,L,np.ndarray,Tensor)
            if not isinstance(v,types): raise cls(k,v)
                
class BDBatchSizeError(Exception):
    def __init__(self,k,v,bs):
        self.message=f'Key:{k} has shape {v} when it should have shape[0]=={bs}'
        super().__init__(self.message)
    @classmethod
    def check(cls,d:'BD'): 
        for k,v in d.items():
            if v.shape[0]!=d.bd_batch_size: raise cls(k,v.shape,d.bd_batch_size)
                
class BDKeyMisMatchError(Exception):
    def __init__(self,different):
        self.message=f'Keys: {different} are not shared by the dictionaries'
        super().__init__(self.message)
    @classmethod
    def check(cls,d:'BD',other:'BD'): 
        eq,different=d.eq_k(other,with_diff=True)
        if not eq: raise cls(different)

#### Correction Functions

In [None]:
# export
def list2tensor(l:(L,list,np.ndarray)): return tensor(l) if not isinstance(l,Tensor) else l
@typedispatch
def batch_dim(o:Tensor,bs=1): return o.unsqueeze(0) if len(o.shape)<2 or o.shape[0]>bs else o

In [None]:
# export
def tensor2shape(k,t:Tensor,bs):
    return {
        k:t.numpy().reshape(-1,) if len(t.shape)==2 and t.shape[1]==1 else [str(t.shape)]*bs
    }

def tensor2mu(k,t:Tensor,bs):
    return {f'{k}_mu':t.reshape(bs,-1).double().mean(axis=1)}

class BD(D):
    def __init__(self,*args,bd_batch_size=1,**kwargs):
        "Converts listy values to tensors. Adds batch dims and validates them."
        if args and isinstance(args[0],type(self)):bd_batch_size=None
        super().__init__(*args,**kwargs)
        self.bd_batch_size=ifnone(bd_batch_size,self.bs())
        BDTypeError.check(self)
        if not self.mapping: 
            self.update(self.mapv(list2tensor).mapv(batch_dim,bs=self.bd_batch_size))
            self.mapping=False
        BDTypeError.check(self,strict=True)
        if not self.mapping: BDBatchSizeError.check(self)

    def bs(self):
        "Return the batch size of `BD` assuming all values have same batch size."
        if len(self.values())==0: return 0
        return dict(self).popitem()[1].shape[0]
        
    def __getitem__(self,o):
        if isinstance(o,int) or is_listy(o) or isinstance(o,Tensor): 
            bs=1 if isinstance(o,int) else None
            return BD({k:self[k][o] for k in self},bd_batch_size=bs)
        return super().__getitem__(o)
    
    def __add__(self,o):
        BDKeyMisMatchError.check(self,o)
        return BD({k:stack(self[k],o[k]) for k in self},
                  bd_batch_size=self.bd_batch_size+o.bd_batch_size)
    
    @delegates(pd.DataFrame)
    def pandas(self,mu=False,**kwargs):
        "Turns a `BD` into a pandas Dataframe optionally showing `mu` of values."
        bs=self.bs()
        return pd.DataFrame(merge(
            *tuple(tensor2shape(k,v,bs) for k,v in self.items()),
            *(tuple(tensor2mu(k,v,bs) for k,v in self.items()) if mu else ())
        ),**kwargs)
        

We need to change any indexer that is passed. We don't know if the indexer is going to
be a numpy array, slice, tensor, or int.
All we know is 2 things:
- If it is an int, the batch dim will disappear
- If it is an indexer, then the batch dim will stay, but be smaller

In [None]:
step_f=lambda: {'state':np.random.rand(4,),'next_state':torch.rand(4,),
                'action':np.random.randint(0,2),'reward':np.random.ranf(),
                'steps':np.random.randint(0,20),'episode_reward':np.random.randint(5,40),
                'env':np.random.randint(5,40),'done':np.random.randint(0,2)==0}
test_fail(lambda:BD(step_f()))
step_f=lambda: {'state':np.random.rand(4,),'next_state':torch.rand(4,),
                'action':[np.random.randint(0,2)],'reward':[np.random.ranf()],
                'steps':[np.random.randint(0,20)],'episode_reward':[np.random.randint(5,40)],
                'env':[np.random.randint(5,40)],'done':[np.random.randint(0,2)==0],
                'image':torch.rand(5,5,3)}

test_d=sum((BD(step_f()) for _ in range(19)),BD(step_f()))
test_eq(test_d.bd_batch_size,20)

Check that indexing works as well as the adjusting of the batch size...

In [None]:
test_eq(type(test_d['state']),Tensor)
test_eq(type(test_d[5]),BD)
test_eq(test_d[5:8].bd_batch_size,3)

In [None]:
test_d.pandas()

Unnamed: 0,state,next_state,action,reward,steps,episode_reward,env,done,image
0,"torch.Size([20, 4])","torch.Size([20, 4])",1,0.184751,11,23,31,False,"torch.Size([20, 5, 5, 3])"
1,"torch.Size([20, 4])","torch.Size([20, 4])",0,0.047794,17,38,5,False,"torch.Size([20, 5, 5, 3])"
2,"torch.Size([20, 4])","torch.Size([20, 4])",0,0.651425,15,19,25,True,"torch.Size([20, 5, 5, 3])"
3,"torch.Size([20, 4])","torch.Size([20, 4])",0,0.212059,8,18,22,False,"torch.Size([20, 5, 5, 3])"
4,"torch.Size([20, 4])","torch.Size([20, 4])",0,0.195875,9,20,14,True,"torch.Size([20, 5, 5, 3])"
5,"torch.Size([20, 4])","torch.Size([20, 4])",0,0.109322,8,11,13,True,"torch.Size([20, 5, 5, 3])"
6,"torch.Size([20, 4])","torch.Size([20, 4])",0,0.676784,10,5,16,True,"torch.Size([20, 5, 5, 3])"
7,"torch.Size([20, 4])","torch.Size([20, 4])",0,0.116126,18,34,10,True,"torch.Size([20, 5, 5, 3])"
8,"torch.Size([20, 4])","torch.Size([20, 4])",0,0.729368,19,10,38,False,"torch.Size([20, 5, 5, 3])"
9,"torch.Size([20, 4])","torch.Size([20, 4])",1,0.237299,11,22,8,False,"torch.Size([20, 5, 5, 3])"


In [None]:
test_eq(BD(test_d)['state'].shape,(20,4))

In [None]:
# hide
from fastcore.imports import in_colab

# Since colab still requires tornado<6, we don't want to import nbdev if we don't have to
if not in_colab():
    from nbdev.export import *
    from nbdev.export2html import *
    from nbdev.cli import make_readme
    make_readme()
    notebook2script()
    notebook2html()

converting /home/fastrl_user/fastrl/nbs/index.ipynb to README.md
Converted .data.block_old.ipynb.
Converted 00_core.ipynb.
Converted 00_nbdev_extension.ipynb.
Converted 05_data.block.ipynb.
Converted 05_data.test_async.ipynb.
Converted 20_test_utils.ipynb.
Converted index.ipynb.
Converted nbdev_template.ipynb.
converting: /home/fastrl_user/fastrl/nbs/05_data.block.ipynb
