In [172]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function


import tensorflow as tf
import tflearn
import numpy as np
from sklearn.model_selection import train_test_split

import drqn
import dataset_utils as d_utils

%load_ext autoreload
%autoreload 2
%reload_ext autoreload

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


### Preprocessing Data for DRQN
We take the data from data generator and save them into traces of (s,a,r,sp) tuples.

Each trajectory corresponds to a trace.

If trajectory has length n, then trace will have length n-1. (since we need the next state sp)

In [183]:
data = d_utils.load_data(filename="../synthetic_data/test-n10000-l3-random.pickle")

In [84]:
dqn_data = d_utils.preprocess_data_for_dqn(data, reward_model="sparse")

In [85]:
# Single Trace
print (dqn_data[0])

[[array([ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.]), array([ 1.,  0.,  0.,  0.,  0.]), 0.0, array([ 1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.])], [array([ 1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.]), array([ 0.,  0.,  0.,  0.,  1.]), 0.20000000000000001, array([ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.])]]


In [86]:
# First tuple in a trace
s,a,r,sp = dqn_data[0][0]
print (s)
print (a)
print (r)
print (sp)

[ 0.  0.  0.  0.  0.  0.  0.  0.  0.  1.]
[ 1.  0.  0.  0.  0.]
0.0
[ 1.  0.  0.  0.  0.  0.  0.  0.  0.  0.]


In [187]:
# Last tuple
s,a,r,sp = dqn_data[0][-1]
print (s)
print (a)
print (r)
print (sp)

[ 1.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
[ 0.  0.  0.  0.  1.]
0.2
[ 0.  0.  0.  0.  0.  0.  0.  0.  0.  1.]


In [88]:
dqn_data_train, dqn_data_test = train_test_split(dqn_data, test_size=0.2)

### Creating a DRQN model and training it

In [186]:
model_id = "test_model_drqn"

# Directory for storing tensorboard summaries
tensorboard_dir = '../tensorboard_logs/' + model_id + '/'
summary_interval = 100
checkpoint_dir = '../checkpoints/' + model_id + '/'
checkpoint_path = checkpoint_dir + '_/'
checkpoint_interval = 200

In [184]:
drqn_model = drqn.DRQNModel(model_id=model_id, timesteps=2)

Loading DQN RNN model...


In [191]:
init = tf.global_variables_initializer()
saver = tf.train.Saver(max_to_keep=3)
writer_summary = tf.summary.FileWriter
merge_all_summaries = tf.summary.merge_all
histogram_summary = tf.summary.histogram
scalar_summary = tf.summary.scalar
with tf.Session() as session:
    session.run(init)
    train_buffer = drqn.ExperienceBuffer()
    train_buffer.buffer = dqn_train_data
    train_buffer.buffer_sz = len(train_buffer.buffer)
    drqn.train(drqn_model, session, saver,train_buffer,load_checkpoint=True, ckpt_path=checkpoint_path)

[autoreload of drqn failed: Traceback (most recent call last):
  File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/IPython/extensions/autoreload.py", line 247, in check
    superreload(m, reload, self.old_objects)
  File "drqn.py", line 82
    def build_drqn(n_timesteps, n_inputdim, n_hidden, n_actions):
                                                                ^
IndentationError: expected an indented block
]
[autoreload of drqn failed: Traceback (most recent call last):
  File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/IPython/extensions/autoreload.py", line 247, in check
    superreload(m, reload, self.old_objects)
  File "drqn.py", line 82
    def build_drqn(n_timesteps, n_inputdim, n_hidden, n_actions):
                                                                ^
IndentationError: expected an indented block
]
[autoreload of drqn failed: Traceback (most recent call last):
  File "/Library/Frameworks/Pyt

NameError: name 'dqn_train_data' is not defined

In [134]:
print (drqn_model.experience_buffer.sample(batch_sz=1))

[[[array([ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.,  0.])
   array([ 0.,  0.,  0.,  1.,  0.]) 0.0
   array([ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.,  0.])]
  [array([ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.,  0.])
   array([ 0.,  0.,  0.,  0.,  1.]) 0.20000000000000001
   array([ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.])]]]


In [135]:
print (drqn_model.experience_buffer.sample_in_order(batch_sz=1))

[[[array([ 0.,  0.,  0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.])
   array([ 0.,  0.,  0.,  0.,  1.]) 0.0
   array([ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.])]
  [array([ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.])
   array([ 0.,  0.,  0.,  1.,  0.]) 0.20000000000000001
   array([ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.,  0.])]]]


In [160]:
train_batch = drqn_model.experience_buffer.sample_in_order(batch_sz=16)
# train_batch is 
s_batch = train_batch[:,:,0]
r_batch = train_batch[:,:,2]

In [147]:
print (np.array(s_batch[0,0]))

[ 0.  0.  0.  0.  0.  0.  0.  0.  1.  0.]


In [161]:
r_batch_new = stack_batch(r_batch)
print (r_batch_new.shape)

(16, 2)
