In [6]:
from fast_rl.core.basic_train import AgentLearner
from fast_rl.agents.dqn import DQN, BaseDQNCallback
from fast_rl.core.train import AgentInterpretation, GroupAgentInterpretation
from fast_rl.core.data_block import MDPDataBunch
from fast_rl.core.agent_core import ExperienceReplay, GreedyEpsilon
from fastai.gen_doc.nbdoc import *

In [7]:
data = MDPDataBunch.from_env('CartPole-v1', render='rgb_array', bs=128)
# Note that if you want to avoid validation running, just turn it off and reflect the change in 
# the interpretation objects.
# data = MDPDataBunch.from_env('CartPole-v1', render='rgb_array', add_valid=False, bs=128)
# AgentInterpretation(learn=learn, ds_type=DatasetType.Train)

In [8]:
show_doc(DQN.__init__)

<h4 id="DQN.__init__" class="doc_header"><code>__init__</code><a class="source_link" data-toggle="collapse" data-target="#DQN-__init__-pytest" style="float:right; padding-right:10px">[test]</a></h4>

> <code>__init__</code>(**`data`**:`MDPDataBunch`, **`memory`**=***`None`***, **`lr`**=***`0.01`***, **`discount`**=***`0.95`***, **`grad_clip`**=***`5`***, **`max_episodes`**=***`None`***, **`exploration_strategy`**=***`None`***, **`use_embeddings`**=***`False`***, **`layers`**=***`None`***)

<div class="collapse" id="DQN-__init__-pytest"><div class="card card-body pytest_card"><a type="button" data-toggle="collapse" data-target="#DQN-__init__-pytest" class="close" aria-label="Close"><span aria-hidden="true">&times;</span></a><p>No tests found for <code>__init__</code>. To contribute a test please refer to <a href="/dev/test.html">this guide</a> and <a href="https://forums.fast.ai/t/improving-expanding-functional-tests/32929">this discussion</a>.</p></div></div>

Trains an Agent using the Q Learning method on a neural net. Notes:
    This is not a true implementation of [1]. A true implementation uses a fixed target network.

References:
    [1] Mnih, Volodymyr, et al. "Playing atari with deep reinforcement learning."
    arXiv preprint arXiv:1312.5602 (2013).

Args:
    data: Used for size input / output information. 

In [9]:
show_doc(BaseDQNCallback.__init__)

<h4 id="BaseDQNCallback.__init__" class="doc_header"><code>__init__</code><a class="source_link" data-toggle="collapse" data-target="#BaseDQNCallback-__init__-pytest" style="float:right; padding-right:10px">[test]</a></h4>

> <code>__init__</code>(**`learn`**, **`max_episodes`**=***`None`***)

<div class="collapse" id="BaseDQNCallback-__init__-pytest"><div class="card card-body pytest_card"><a type="button" data-toggle="collapse" data-target="#BaseDQNCallback-__init__-pytest" class="close" aria-label="Close"><span aria-hidden="true">&times;</span></a><p>No tests found for <code>__init__</code>. To contribute a test please refer to <a href="/dev/test.html">this guide</a> and <a href="https://forums.fast.ai/t/improving-expanding-functional-tests/32929">this discussion</a>.</p></div></div>

Handles basic DQN end of step model optimization.  

In [10]:
show_doc(BaseDQNCallback.on_loss_begin)

<h4 id="BaseDQNCallback.on_loss_begin" class="doc_header"><code>on_loss_begin</code><a class="source_link" data-toggle="collapse" data-target="#BaseDQNCallback-on_loss_begin-pytest" style="float:right; padding-right:10px">[test]</a></h4>

> <code>on_loss_begin</code>(**\*\*`kwargs`**:`Any`)

<div class="collapse" id="BaseDQNCallback-on_loss_begin-pytest"><div class="card card-body pytest_card"><a type="button" data-toggle="collapse" data-target="#BaseDQNCallback-on_loss_begin-pytest" class="close" aria-label="Close"><span aria-hidden="true">&times;</span></a><p>No tests found for <code>on_loss_begin</code>. To contribute a test please refer to <a href="/dev/test.html">this guide</a> and <a href="https://forums.fast.ai/t/improving-expanding-functional-tests/32929">this discussion</a>.</p></div></div>

Performs memory updates, exploration updates, and model optimization.  

The batch size will be defined in the data class because `DataBunches` already require a 
batch size input. This batch size will be used by the model during optimization.

In [11]:
model = DQN(data, memory=ExperienceReplay(memory_size=100000, reduce_ram=True))

Used by fastai API for training the model.

In [None]:
learn = AgentLearner(data, model)
learn.fit(450)
data.close()
learn.recorder.plot_losses()

epoch,train_loss,valid_loss,time
0,0.973459,0.907868,00:09
1,0.934833,0.82761,00:01
2,0.859068,0.713839,00:02
3,0.798632,0.661734,00:01
4,0.656191,0.503011,00:03
5,0.518848,0.487771,00:02
6,0.47338,0.379639,00:01
7,0.387111,0.253746,00:02
8,0.344052,0.227344,00:01
9,0.251088,0.29755,00:07


In [None]:
interp = AgentInterpretation(learn)

In [None]:
interp.plot_rewards(cumulative=True, per_episode=True, group_name='run')

We can also pipe-line this to truly see how our model actually performs.

In [None]:
group_interp = GroupAgentInterpretation()
group_interp.add_interpretation(interp)
for i in range(4):
    data = MDPDataBunch.from_env('CartPole-v1', render='rgb_array', bs=128)
    model = DQN(data, memory=ExperienceReplay(memory_size=100000, reduce_ram=True))
    learn = AgentLearner(data, model)
    learn.fit(450)
    interp = AgentInterpretation(learn)
    interp.plot_rewards(cumulative=True, per_episode=True, group_name='run', no_show=True)
    group_interp.add_interpretation(interp)
    data.close()

In [None]:
group_interp.plot_reward_bounds(per_episode=True)

In [None]:
[g.analysis for g in group_interp.groups]

In [None]:
group_interp.to_pickle('data/dqn', 'dqn')

In [None]:
from_pickle_interp = group_interp.from_pickle('data/dqn', 'dqn')

In [None]:
from_pickle_interp.plot_reward_bounds(per_episode=True)