In [13]:
from fast_rl.core.basic_train import AgentLearner
from fast_rl.agents.dqn import FixedTargetDQN, FixedTargetDQNCallback
from fast_rl.core.train import AgentInterpretation, GroupAgentInterpretation
from fast_rl.core.data_block import MDPDataBunch
from fast_rl.core.agent_core import ExperienceReplay, GreedyEpsilon
from fastai.basic_data import DatasetType
from fast_rl.core.metrics import *
from fastai.gen_doc.nbdoc import *

In [14]:
show_doc(FixedTargetDQN.__init__)

<h4 id="FixedTargetDQN.__init__" class="doc_header"><code>__init__</code><a class="source_link" data-toggle="collapse" data-target="#FixedTargetDQN-__init__-pytest" style="float:right; padding-right:10px">[test]</a></h4>

> <code>__init__</code>(**`data`**:`MDPDataBunch`, **`memory`**=***`None`***, **`tau`**=***`0.01`***, **`copy_over_frequency`**=***`3`***, **\*\*`kwargs`**)

<div class="collapse" id="FixedTargetDQN-__init__-pytest"><div class="card card-body pytest_card"><a type="button" data-toggle="collapse" data-target="#FixedTargetDQN-__init__-pytest" class="close" aria-label="Close"><span aria-hidden="true">&times;</span></a><p>No tests found for <code>__init__</code>. To contribute a test please refer to <a href="/dev/test.html">this guide</a> and <a href="https://forums.fast.ai/t/improving-expanding-functional-tests/32929">this discussion</a>.</p></div></div>

Trains an Agent using the Q Learning method on a 2 neural nets. Notes:
    Unlike the base DQN, this is a true reflection of ref [1]. We use 2 models instead of one to allow for
    training the action model more stably.

Args:
    data: Used for size input / output information.

References:
    [1] Mnih, Volodymyr, et al. "Playing atari with deep reinforcement learning."
    arXiv preprint arXiv:1312.5602 (2013). 

In [15]:
show_doc(FixedTargetDQN.target_copy_over)

<h4 id="FixedTargetDQN.target_copy_over" class="doc_header"><code>target_copy_over</code><a class="source_link" data-toggle="collapse" data-target="#FixedTargetDQN-target_copy_over-pytest" style="float:right; padding-right:10px">[test]</a></h4>

> <code>target_copy_over</code>()

<div class="collapse" id="FixedTargetDQN-target_copy_over-pytest"><div class="card card-body pytest_card"><a type="button" data-toggle="collapse" data-target="#FixedTargetDQN-target_copy_over-pytest" class="close" aria-label="Close"><span aria-hidden="true">&times;</span></a><p>No tests found for <code>target_copy_over</code>. To contribute a test please refer to <a href="/dev/test.html">this guide</a> and <a href="https://forums.fast.ai/t/improving-expanding-functional-tests/32929">this discussion</a>.</p></div></div>

Updates the target network from calls in the FixedTargetDQNCallback callback.  

In [16]:
show_doc(FixedTargetDQNCallback.__init__)

<h4 id="FixedTargetDQNCallback.__init__" class="doc_header"><code>__init__</code><a class="source_link" data-toggle="collapse" data-target="#FixedTargetDQNCallback-__init__-pytest" style="float:right; padding-right:10px">[test]</a></h4>

> <code>__init__</code>(**`learn`**, **`copy_over_frequency`**=***`3`***)

<div class="collapse" id="FixedTargetDQNCallback-__init__-pytest"><div class="card card-body pytest_card"><a type="button" data-toggle="collapse" data-target="#FixedTargetDQNCallback-__init__-pytest" class="close" aria-label="Close"><span aria-hidden="true">&times;</span></a><p>No tests found for <code>__init__</code>. To contribute a test please refer to <a href="/dev/test.html">this guide</a> and <a href="https://forums.fast.ai/t/improving-expanding-functional-tests/32929">this discussion</a>.</p></div></div>

Handles updating the target model in a fixed target DQN. Args:
    learn: Basic Learner.
    copy_over_frequency: For every N iterations we want to update the target model. 

In [None]:
data = MDPDataBunch.from_env('CartPole-v1', render='rgb_array', bs=32, add_valid=False)
model = FixedTargetDQN(data, memory=ExperienceReplay(memory_size=100000, reduce_ram=True),
                       optimizer=torch.optim.RMSprop, copy_over_frequency=3)
learn = AgentLearner(data, model, callback_fns=[RewardMetric, EpsilonMetric])
learn.fit(450)
data.close()
learn.recorder.plot_losses()

epoch,train_loss,valid_loss,train_reward,valid_reward,epsilon,time
0,0.974766,#na#,40.0,0,0.965576,00:01
1,0.899604,#na#,9.0,0,0.95782,00:00
2,0.777173,#na#,17.0,0,0.943361,00:00
3,0.690075,#na#,17.0,0,0.929145,00:00
4,0.48327,#na#,46.0,0,0.891868,00:02
5,0.420364,#na#,13.0,0,0.88164,00:00
6,0.316571,#na#,28.0,0,0.860058,00:01
7,0.177591,#na#,60.0,0,0.815796,00:03
8,0.157301,#na#,17.0,0,0.80373,00:00
9,0.139129,#na#,16.0,0,0.79256,00:00


In [None]:
interp = AgentInterpretation(learn, ds_type=DatasetType.Train)

In [None]:
interp.plot_rewards(cumulative=True, per_episode=True, group_name='er_rms')

We can also pipe-line this to truly see how our model actually performs.

In [None]:
group_interp = GroupAgentInterpretation()
group_interp.add_interpretation(interp)
for i in range(4):
    data = MDPDataBunch.from_env('CartPole-v1', render='rgb_array', bs=128)
    model = FixedTargetDQN(data, memory=ExperienceReplay(memory_size=100000, reduce_ram=True))
    learn = AgentLearner(data, model)
    learn.fit(3)
    interp = AgentInterpretation(learn)
    interp.plot_rewards(cumulative=True, per_episode=True, group_name='run', no_show=True)
    group_interp.add_interpretation(interp)
    data.close()

In [None]:
group_interp.plot_reward_bounds(per_episode=True)

In [None]:
[g.analysis for g in group_interp.groups]

In [None]:
group_interp.append_meta('run1').to_pickle('data/fixed_target_dqn', 'fixed_target_dqn')

In [None]:
from_pickle_interp = GroupAgentInterpretation.from_pickle('data/fixed_target_dqn', 
                                                          'fixedtargetdqn_er')
from_pickle_interp.add_interpretation(GroupAgentInterpretation.from_pickle('data/fixed_target_dqn', 
                                                          'fixedtargetdqn_per'))

In [None]:
from_pickle_interp.plot_reward_bounds(per_episode=True, smooth_groups=25)