In [1]:
#hide
#skip
%config Completer.use_jedi = False
# upgrade fastrl on colab
! [ -e /content ] && pip install -Uqq fastrl['dev'] pyvirtualdisplay && \
                     apt-get install -y xvfb python-opengl > /dev/null 2>&1 
# NOTE: IF YOU SEE VERSION ERRORS, IT IS SAFE TO IGNORE THEM. COLAB IS BEHIND IN SOME OF THE PACKAGE VERSIONS

In [2]:
# hide
from fastcore.imports import in_colab
# Since colab still requires tornado<6, we don't want to import nbdev if we don't have to
if not in_colab():
    from nbdev.showdoc import *
    from nbdev.imports import *
    if not os.environ.get("IN_TEST", None):
        assert IN_NOTEBOOK
        assert not IN_COLAB
        assert IN_IPYTHON
else:
    # Virutual display is needed for colab
    from pyvirtualdisplay import Display
    display = Display(visible=0, size=(400, 300))
    display.start()

In [3]:
# default_exp agents.dqn.targets

In [4]:
# export
# Python native modules
import os
from collections import deque
from typing import *
# Third party libs
import torch
from torch.nn import *
from fastcore.all import *
from fastai.learner import *
from fastai.torch_basics import *
from fastai.torch_core import *
from fastai.callback.all import *
# Local modules
from fastrl.data.block_simple import *
from fastrl.data.gym import *
from fastrl.agent import *
from fastrl.core import *
from fastrl.agents.dqn.core import *

# DQN Targets + N-Step
> A Bare-Bones DQN is usually extremely unstable. Target models can eleviate this. We also support First-Last N steps better.

In [5]:
# export
class DQNTargetTrainer(Callback):
    
    def __init__(self,n_batch=0,target_sync=300,discount=0.99,n_steps=1):
        store_attr()
        self._xb=None  
        
    def before_fit(self):
        self.learn.target_model=deepcopy(self.learn.model.model)
        self.n_batch=0
    
    def after_pred(self):
#         print(self.xb)
        self.learn.yb=self.xb
        self.learn.xb=self.xb[0]
        self._xb=({k:v.clone() for k,v in self.xb.items()},)
        self.learn.done_mask=self.xb['done'].reshape(-1,)

        # Get the target
        self.learn.next_q=self.target_model(self.xb['next_state']).max(dim=1).values.reshape(-1,1)
        self.learn.next_q[self.done_mask]=0
        self.learn.targets=self.xb['reward']+self.learn.next_q*(self.discount**self.n_steps)
        self.learn.yb=(self.learn.targets.reshape(-1),)        
        # Get the current model output
        self.learn.action_v=self.learn.model.model(self.xb['state'])
        self.learn.actual_actions=self.xb['action']
        self.learn.pred=self.learn.action_v.gather(1,self.xb['action']).reshape(-1)
        
    def before_backward(self): self.learn.xb=self._xb
        
    def after_batch(self):
        if self.n_batch%self.target_sync==0:
            self.target_model.load_state_dict(self.learn.model.state_dict())
        self.n_batch+=1

In [9]:
dqn=DQN(4,2)
agent=Agent(dqn,cbs=[ArgMaxFeed,DiscreteEpsilonRandomSelect(min_epsilon=0.02,max_steps=1000)])
source=Source(cbs=[GymLoop('CartPole-v1',agent,steps_count=1,
                           steps_delta=1),FirstLast])
dls=SourceDataBlock().dataloaders([source],n=1000,bs=1,num_workers=0)

learn=Learner(dls,agent,loss_func=MSELoss(),
              cbs=[ExperienceReplay(bs=32,max_sz=100000,warmup_sz=32),DQNTargetTrainer(n_steps=1)],
              metrics=[Reward,Epsilon,NEpisodes])

Could not do one pass in your dataloader, there is something wrong in it


In [11]:
slow=True
learn.fit(3 if not slow else 60,lr=0.0001,wd=0)

epoch,train_loss,train_reward,train_epsilon,train_n_episodes,valid_loss,valid_reward,valid_epsilon,valid_n_episodes,time
0,32.967072,148.78,0.02,579,00:38,,,,
1,33.991703,142.19,0.02,588,00:31,,,,
2,36.916882,144.02,0.02,594,00:29,,,,
3,49.794846,143.55,0.02,601,00:28,,,,
4,35.804199,148.16,0.02,607,00:28,,,,
5,49.903011,144.23,0.02,615,00:29,,,,
6,38.329674,142.64,0.02,620,00:30,,,,
7,36.479065,141.26,0.02,628,00:28,,,,
8,51.793835,141.73,0.02,635,00:29,,,,
9,25.748798,137.22,0.02,644,00:30,,,,


In [12]:
# hide
from fastcore.imports import in_colab

# Since colab still requires tornado<6, we don't want to import nbdev if we don't have to
if not in_colab():
    from nbdev.export import *
    from nbdev.export2html import *
    from nbdev.cli import make_readme
    make_readme()
    notebook2script()
    notebook2html()

converting /home/fastrl_user/fastrl/nbs/index.ipynb to README.md
Converted 00_core.ipynb.
Converted 00_nbdev_extension.ipynb.
Converted 03_callback.core.ipynb.
Converted 04_agent.ipynb.
Converted 05_data.test_async.ipynb.
Converted 05a_data.block.ipynb.
Converted 05b_data.block_simple.ipynb.
Converted 05c_data.gym.ipynb.
Converted 10a_agents.dqn.core.ipynb.
Converted 10b_agents.dqn.targets.ipynb.
Converted 10c_agents.dqn.double.ipynb.
Converted 10d_agents.dqn.dueling.ipynb.
Converted 10e_agents.dqn.categorical.ipynb.
Converted 11a_agents.policy_gradient.ppo.ipynb.
Converted 20_test_utils.ipynb.
Converted index.ipynb.
Converted nbdev_template.ipynb.
converting: /home/fastrl_user/fastrl/nbs/10b_agents.dqn.targets.ipynb
