In [136]:
# default_exp metrics
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [137]:
# hide
from nbdev.showdoc import *
from nbdev.imports import *
from nbdev.export2html import *
if not os.environ.get("IN_TEST", None):
    assert IN_NOTEBOOK
    assert not IN_COLAB
    assert IN_IPYTHON

# Metrics

> Metrics for reinforcement learning

In [138]:
# export
from fastai.callback import *
from fastai.basic_train import *
from fastai.core import *
from fastai.torch_core import *
from dataclasses import dataclass
import torch.multiprocessing as mp
import logging

logging.basicConfig(format='[%(asctime)s] p%(process)s line:%(lineno)d %(levelname)s - %(message)s',
                    datefmt='%m-%d %H:%M:%S')
logging.getLogger('fastrl.data_block').setLevel('CRITICAL')
_logger=logging.getLogger(__name__)

In [139]:
# hide
from fastrl.data_block import *
from fastrl.basic_agents import *
from fastrl.basic_train import *
from fastai.basic_data import *
import sys

_logger.setLevel('INFO')

In [158]:
# export
@dataclass
class TotalRewards(object):
    rewards:float

class RewardMetric(LearnerCallback):
    _order=-20

    def on_train_begin(self, **kwargs):
        metric_names = ['train_reward'] if self.learn.recorder.no_val or self.learn.data.empty_val else ['train_reward', 'valid_reward']
        self.learn.recorder.add_metric_names(metric_names)
        for ds in [self.learn.data.train_ds,None if self.learn.data.empty_val else self.learn.data.valid_ds]:
            if hasattr(ds,'metric_queue') and ds.metric_queue is None:
                ds.metric_queue=mp.JoinableQueue(ds.queue_sz*len(ds)) # Make sure this queue has more space to prevent locking
                

    def on_epoch_end(self,last_metrics,**kwargs: Any):
        rewards=[]
        for ds in [self.learn.data.train_ds,None if self.learn.data.empty_val else self.learn.data.valid_ds]:
            if ds is None:continue
            rs=[]
            if hasattr(ds,'metric_queue'): 
                if ds.metric_queue is not None:
                    while not ds.metric_queue.empty():
                        o=ds.metric_queue.get()
                        if o is not None:rs.append(o.rewards)
            else:rs=ds.pop_total_rewards()
            rewards.append(np.mean(rs))
        return add_metrics(last_metrics,rewards)

In [141]:
def _grad_fitter(model:Optional[nn.Module],learner_cls:Optional['AgentLearner'],agent:Optional['BaseAgent'],ds_cls:ExperienceSourceDataset,
            pause_event:mp.Event,cancel_event:mp.Event,main_queue:Optional[mp.JoinableQueue],metric_queue:Optional[mp.JoinableQueue]):
    "Default fitter for AsyncGradExperienceSourceDataset."
    ds=ds_cls()
    dl=DataLoader(ds,batch_size=1,num_workers=0)

    if learner_cls is not None:
        learn=learner_cls(data=DataBunch(dl,dl),model=model,agent=agent)
        ds.learn=learn
    try:
        while not cancel_event.is_set():
            for xb,yb in ds:
                while pause_event.is_set() and not self.cancel_event.is_set():cancel_event.wait(0.1)
                if main_queue is not None:
                    loss=0.5 # Place holder
                    main_queue.put((xb,{'loss':loss,**yb}))
            if metric_queue is not None:
                total_rewards=ds.pop_total_rewards()
                if len(total_rewards)!=0:
                    if metric_queue.full():_logger.warning('Metric queue is full. Increase its size,empty it, or set metric_queue to None.')
                    metric_queue.put(TotalRewards(total_rewards))                    
            while pause_event.is_set():pass
    finally:
        main_queue.put(None)
        metric_queue.put(None)
        cancel_event.set()
        sys.stdout.flush()

In [142]:
data=AsyncExperienceSourceDataBunch.from_env('CartPole-v1',use_grad_experience=True,firstlast=True,add_valid=False,n_processes=4,fitter_fn=_grad_fitter,
                                             bs=64,ds_kwargs={'n_envs':15})
model=nn.Sequential(nn.Linear(4,5),nn.ReLU(),nn.Linear(5,2))
agent=DQNAgent(model=model)
learn=AgentLearner(data,model,agent=agent,callback_fns=[FakeRunCallback,RewardMetric])
setattr(learn,'fitter',_grad_fitter)
learn.fit(10,lr=0.01,wd=1)

epoch,train_loss,valid_loss,train_reward,time
0,0.5,#na#,10.75,00:01
1,0.5,#na#,10.375,00:01
2,0.5,#na#,9.4,00:01
3,0.5,#na#,9.571429,00:01
4,0.5,#na#,9.75,00:01
5,0.5,#na#,9.6,00:01
6,0.5,#na#,9.857143,00:01
7,0.5,#na#,9.25,00:01
8,0.5,#na#,9.5,00:01
9,0.5,#na#,9.333333,00:01


In [143]:
data=ExperienceSourceDataBunch.from_env('CartPole-v1')
model=nn.Sequential(nn.Linear(4,5),nn.ReLU(),nn.Linear(5,2))
agent=DQNAgent(model=model)
learn=AgentLearner(data,model,agent=agent,callback_fns=[FakeRunCallback,RewardMetric])
learn.fit(10,lr=0.01,wd=1)

epoch,train_loss,valid_loss,train_reward,valid_reward,time
0,0.5,#na#,18.0,,00:00
1,0.5,#na#,11.0,,00:00
2,0.5,#na#,12.0,,00:00
3,0.5,#na#,9.0,,00:00
4,0.5,#na#,9.0,,00:00
5,0.5,#na#,10.0,,00:00
6,0.5,#na#,10.0,,00:00
7,0.5,#na#,9.0,,00:00
8,0.5,#na#,8.0,,00:00
9,0.5,#na#,9.0,,00:00


In [153]:
# export 
class NGamesMetric(LearnerCallback):
    _order=-20
    def __init__(self,*args,**kwargs):
        super(NGamesMetric,self).__init__(*args,**kwargs)
        self.n_games=0
    
    def on_train_begin(self,**kwargs):
        metric_names = ['train_n_games'] if self.learn.recorder.no_val or self.learn.data.empty_val else ['train_n_games', 'valid_n_games']
        self.learn.recorder.add_metric_names(metric_names)
    
    def on_batch_begin(self,last_target,**kwargs):
        if type(last_target)==list:self.n_games+=sum([o['d'].sum().numpy() for o in last_target])
        else:                      self.n_games+=int(last_target['d'].sum().numpy())
        
    def on_epoch_end(self,last_metrics,**kwargs: Any):
        return add_metrics(last_metrics,int(self.n_games))

In [151]:
data=AsyncExperienceSourceDataBunch.from_env('CartPole-v1',use_grad_experience=True,firstlast=True,add_valid=False,n_processes=4,fitter_fn=_grad_fitter,
                                             bs=64,ds_kwargs={'n_envs':15})
model=nn.Sequential(nn.Linear(4,5),nn.ReLU(),nn.Linear(5,2))
agent=DQNAgent(model=model)
learn=AgentLearner(data,model,agent=agent,callback_fns=[FakeRunCallback,NGamesMetric])
setattr(learn,'fitter',_grad_fitter)
learn.fit(10,lr=0.01,wd=1)

epoch,train_loss,valid_loss,train_n_games,time
0,0.5,#na#,4,00:01
1,0.5,#na#,8,00:01
2,0.5,#na#,12,00:01
3,0.5,#na#,16,00:01
4,0.5,#na#,22,00:01
5,0.5,#na#,28,00:01
6,0.5,#na#,32,00:01
7,0.5,#na#,38,00:01
8,0.5,#na#,41,00:01
9,0.5,#na#,45,00:01


In [154]:
data=AsyncExperienceSourceDataBunch.from_env('CartPole-v1',firstlast=True,add_valid=False,n_processes=4,
                                             bs=64,ds_kwargs={'n_envs':15})
model=nn.Sequential(nn.Linear(4,5),nn.ReLU(),nn.Linear(5,2))
agent=DQNAgent(model=model)
learn=AgentLearner(data,model,agent=agent,callback_fns=[FakeRunCallback,NGamesMetric])
learn.fit(10,lr=0.01,wd=1)

epoch,train_loss,valid_loss,train_n_games,time
0,0.5,#na#,5,00:01
1,0.5,#na#,13,00:01
2,0.5,#na#,19,00:01
3,0.5,#na#,27,00:01
4,0.5,#na#,33,00:01
5,0.5,#na#,39,00:01
6,0.5,#na#,47,00:01
7,0.5,#na#,55,00:01
8,0.5,#na#,60,00:01
9,0.5,#na#,67,00:01


In [159]:
# hide
from nbdev.export import *
notebook2script()
notebook2html(n_workers=0)

Converted 00_core.ipynb.
Converted 01_wrappers.ipynb.
Converted 02_callbacks.ipynb.
Converted 03_basic_agents.ipynb.
Converted 04_metrics.ipynb.
Converted 05_data_block.ipynb.
Converted 06_basic_train.ipynb.
Converted 12_a3c.a3c_data.ipynb.
Converted index.ipynb.
Converted notes.ipynb.


converting: /opt/project/fastrl/nbs/12_a3c.a3c_data.ipynb
converting: /opt/project/fastrl/nbs/03_basic_agents.ipynb
converting: /opt/project/fastrl/nbs/04_metrics.ipynb
