In [None]:
#hide
#skip
! [ -e /content ] && pip install -Uqq fastrl['dev']  # upgrade fastrl on colab

In [None]:
# hide
from fastcore.imports import in_colab

# Since colab still requires tornado<6, we don't want to import nbdev if we don't have to
if not in_colab():
    from nbdev.showdoc import *
    from nbdev.imports import *
    if not os.environ.get("IN_TEST", None):
        assert IN_NOTEBOOK
        assert not IN_COLAB
        assert IN_IPYTHON

In [None]:
# export
# Python native modules
import os,warnings
# Third party libs
from fastcore.all import *
from fastai.torch_core import *
from fastai.basics import *
import pandas as pd
# Local modules

In [None]:
# hide
import torch
import numpy as np

# Core
> Core libs for fastrl

# Python Extensions

In [None]:
# export
def isnone(v): return v is None

In [None]:
# export
def map_dict_ex(d,f,*args,gen=False,wise=None,**kwargs):
    "Like `map`, but for dicts and uses `bind`, and supports `str` and indexing"
    g = (bind(f,*args,**kwargs) if callable(f)
         else f.format if isinstance(f,str)
         else f.__getitem__)

    if wise is None:  return map(g,d.items())
    return ((k,g(v)) if wise=='value' else (g(k),v) for k,v in d.items())

Check that general mapping for dicts works nicely...

In [None]:
test_dict={'a':1,'b':2,'c':3}
test_eq(dict(map_dict_ex(test_dict,lambda t:(t[0]+'_new',t[1]+1))),{'a_new':2,'b_new':3,'c_new':4})

Check that key and value wise mapping works correctly...

In [None]:
test_eq(dict(map_dict_ex(test_dict,lambda k:k+'_new',wise='key')),{'a_new':1,'b_new':2,'c_new':3})
test_eq(dict(map_dict_ex(test_dict,lambda v:v+1,wise='value')),{'a':2,'b':3,'c':4})

In [None]:
# export
def batch_sz(arr): 
    if isinstance(arr,np.ndarray): return arr.shape[0]
    elif isinstance(arr,Tensor):   return arr.size()[0]
    elif isinstance(arr,(list,L)):     return len(arr)
    return None

@typedispatch
def stack(a,b): return L(a)+L(b)
@typedispatch
def stack(a:L,b): return a+L(b)
@typedispatch
def stack(a,b:L): return L(a)+b
@typedispatch
def stack(a:L,b:L): return a+b
@typedispatch
def stack(a:Tensor,b:Tensor): return torch.vstack((a,b))
@typedispatch
def stack(a:(np.array,np.ndarray),b:(np.array,np.ndarray)): return np.vstack((a,b))

class UnCollatable(Exception):
    def __init__(self,data,reasons:str=None,msg=''):
        store_attr()
        self.reasons=reasons.split(',')
        if 'nones' in reasons: self.msg+=f'Some values are not listy: {self.data}'
        if 'mismatch' in reasons: self.msg+=f'Some bs do not match {self.data}'
        
    def __str__(self): return self.msg
   

In [None]:
# export
_error_msg='Found idxs: %s have values more than %s e.g.: %s'

def add_batch(a,indexes):
    if not isinstance(indexes,(list,L)):
        if isinstance(a,(np.ndarray,)):
            return np.expand_dims(a,0)
    return a

class D(dict):
    "Improved version of `dict` with array handling abilities"
    def __init__(self,*args,**kwargs):
        if isinstance(args,(tuple,list,L)):
            if len(args)==1 and isinstance(args[0],(tuple,list,L)):
                args=args[0]
                if all([type(v)==dict for v in args]):
                    args=L(args).map(D)
                    args=(sum(args[1:],args[0]),)
        super().__init__(*args,**kwargs)
    
    def __add__(self,o:'D')->'D':
        if not self.eq_k(o): ValueError(f'Key Mismatch: self:{self.keys()} o:{self.keys()}')
        d=deepcopy(self)
        for k in self: d[k]=stack(d[k],o[k])
        return d

    def eq_k(self,o:'D'): return set(o.keys())==set(self.keys())
    def eq_types(self,o:'D'): return set(map(type,o.values()))==set(map(type,o.values()))
    def _new(self,*args,**kwargs): return type(self)(*args,**kwargs)
    def argwhere(self,k,f,*args,**kwargs): return f(self[k],*args,**kwargs)
    def filter(self,k=None,f=None,*args,indexes=None,**kwargs):
        if indexes is None: indexes=f(self[k],*args,**kwargs)
        bs=self.bs()
        if max(indexes)>=bs: raise IndexError(_error_msg%(indexes,bs,max(idxs)))
        return self.subset(indexes)
    
    def subset(self,indexes): 
        return type(self)({k:add_batch(self[k][indexes],indexes) for k in self})
    
    def map(self,f,*args,gen=False,**kwargs): 
        return (self._new,noop)[gen](map_dict_ex(self,f,*args,**kwargs))
    def mapk(self,f,*args,gen=False,wise='key',**kwargs):
        return self.map(f,*args,gen=gen,wise=wise,**kwargs)
    def mapv(self,f,*args,gen=False,wise='value',**kwargs):
        return self.map(f,*args,gen=gen,wise=wise,**kwargs)
    
    def bs(self,validate=True):
        bs_map=self.mapv(batch_sz)
        if validate:
            if any(list(self.mapv(isnone).values())): 
                raise UnCollatable(bs_map,'nones')
            if max(bs_map.values())!=min(bs_map.values()):
                raise UnCollatable(bs_map,'mismatch')
        return max(bs_map.values())
    
    @delegates(pd.DataFrame)
    def pandas(self,**kwargs):
        d=deepcopy(self)
        items=list(d.items())
        for k,v in items:
            bs=d.bs()
            if hasattr(v,'mean'): 
                v=v.reshape(bs,-1)
                v=v.astype(float) if hasattr(v,'astype') else v.double()
                d[f'{k}_mu']=v.mean(axis=1)
            if isinstance(v,np.ndarray): 
                if len(v.shape)==2 and v.shape[1]==1:d[k]=v.tolist()
                else:                                d[k]=[str(v.shape)]*bs
            if isinstance(v,Tensor): 
                if len(v.shape)==2 and v.shape[1]==1:d[k]=v.numpy().tolist()
                else:                                d[k]=[str(v.shape)]*bs
        return pd.DataFrame(d,**kwargs)

In [None]:
step_f=lambda: {'state':np.random.rand(4,),'next_state':torch.rand(4,),
                'action':np.random.randint(0,2),'reward':np.random.ranf(),
                'steps':np.random.randint(0,20),'episode_reward':np.random.randint(5,40),
                'env':np.random.randint(5,40),'done':np.random.randint(0,2)==0}

test_d=D(tuple(step_f() for _ in range(20)))
test_d.pandas()

Unnamed: 0,state,next_state,action,reward,steps,episode_reward,env,done,state_mu,next_state_mu
0,"(20, 4)","torch.Size([20, 4])",0,0.19693,16,16,10,True,0.516361,0.279241
1,"(20, 4)","torch.Size([20, 4])",1,0.143398,19,6,9,True,0.434413,0.68809
2,"(20, 4)","torch.Size([20, 4])",1,0.861395,15,32,23,True,0.573047,0.514426
3,"(20, 4)","torch.Size([20, 4])",1,0.601002,4,37,11,True,0.46217,0.39983
4,"(20, 4)","torch.Size([20, 4])",0,0.390778,16,25,37,True,0.536667,0.28517
5,"(20, 4)","torch.Size([20, 4])",1,0.795684,10,19,18,False,0.505432,0.483909
6,"(20, 4)","torch.Size([20, 4])",0,0.804589,18,19,27,True,0.745842,0.583066
7,"(20, 4)","torch.Size([20, 4])",1,0.177756,12,35,19,False,0.460609,0.356298
8,"(20, 4)","torch.Size([20, 4])",1,0.628984,15,15,8,True,0.758492,0.455227
9,"(20, 4)","torch.Size([20, 4])",0,0.625511,2,21,29,False,0.646965,0.538502


In [None]:
test_d.filter('done',L.argwhere,lambda x:x==True).pandas()

Unnamed: 0,state,next_state,action,reward,steps,episode_reward,env,done,state_mu,next_state_mu
0,"(15, 4)","torch.Size([15, 4])",0,0.19693,16,16,10,True,0.516361,0.279241
1,"(15, 4)","torch.Size([15, 4])",1,0.143398,19,6,9,True,0.434413,0.68809
2,"(15, 4)","torch.Size([15, 4])",1,0.861395,15,32,23,True,0.573047,0.514426
3,"(15, 4)","torch.Size([15, 4])",1,0.601002,4,37,11,True,0.46217,0.39983
4,"(15, 4)","torch.Size([15, 4])",0,0.390778,16,25,37,True,0.536667,0.28517
5,"(15, 4)","torch.Size([15, 4])",0,0.804589,18,19,27,True,0.745842,0.583066
6,"(15, 4)","torch.Size([15, 4])",1,0.628984,15,15,8,True,0.758492,0.455227
7,"(15, 4)","torch.Size([15, 4])",0,0.851111,14,29,24,True,0.484955,0.519957
8,"(15, 4)","torch.Size([15, 4])",0,0.426686,11,20,29,True,0.435867,0.334742
9,"(15, 4)","torch.Size([15, 4])",0,0.353374,1,16,39,True,0.608765,0.422973


In [None]:
test_dict=D({'a':1,'b':2,'c':3})
test_eq(test_dict.map(lambda t:(t[0]+'_new',t[1]+1)),{'a_new':2,'b_new':3,'c_new':4})
test_eq(isinstance(test_dict.map(lambda t:(t[0]+'_new',t[1]+1),gen=True),map),True)
test_eq(dict(test_dict.map(lambda t:(t[0]+'_new',t[1]+1),gen=True)),{'a_new':2,'b_new':3,'c_new':4})

test_eq(test_dict.mapk(lambda k:k+'_new'),{'a_new':1,'b_new':2,'c_new':3})
test_eq(dict(test_dict.mapk(lambda k:k+'_new',gen=True)),{'a_new':1,'b_new':2,'c_new':3})

test_eq(test_dict.mapv(lambda v:v+1,wise='value'),{'a':2,'b':3,'c':4})
test_eq(dict(test_dict.mapv(lambda v:v+1,gen=True,wise='value')),{'a':2,'b':3,'c':4})

In [None]:
D(states=torch.rand(20,4),done=[False]*20,reward=np.ones((20,1))).bs()

20

In [None]:
D(states=torch.rand(20,4),done=[False]*20,reward=np.ones((20,1))).pandas()

Unnamed: 0,states,done,reward,states_mu,reward_mu
0,"torch.Size([20, 4])",False,[1.0],0.415381,1.0
1,"torch.Size([20, 4])",False,[1.0],0.624829,1.0
2,"torch.Size([20, 4])",False,[1.0],0.538516,1.0
3,"torch.Size([20, 4])",False,[1.0],0.514669,1.0
4,"torch.Size([20, 4])",False,[1.0],0.173463,1.0
5,"torch.Size([20, 4])",False,[1.0],0.242587,1.0
6,"torch.Size([20, 4])",False,[1.0],0.292915,1.0
7,"torch.Size([20, 4])",False,[1.0],0.437619,1.0
8,"torch.Size([20, 4])",False,[1.0],0.430351,1.0
9,"torch.Size([20, 4])",False,[1.0],0.223198,1.0


In [None]:
# hide
from fastcore.imports import in_colab

# Since colab still requires tornado<6, we don't want to import nbdev if we don't have to
if not in_colab():
    from nbdev.export import *
    from nbdev.export2html import *
    from nbdev.cli import make_readme
    make_readme()
    notebook2script()
    notebook2html()

converting /home/fastrl_user/fastrl/nbs/index.ipynb to README.md
Converted .data.block_old.ipynb.
Converted 00_core.ipynb.
Converted 00_nbdev_extension.ipynb.
Converted 05_data.block.ipynb.
Converted 05_data.test_async.ipynb.
Converted 20_test_utils.ipynb.
Converted index.ipynb.
Converted nbdev_template.ipynb.
converting: /home/fastrl_user/fastrl/nbs/05_data.block.ipynb
converting: /home/fastrl_user/fastrl/nbs/00_core.ipynb
