In [172]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function


import tensorflow as tf
import tflearn
import numpy as np
from sklearn.model_selection import train_test_split

import drqn
import dataset_utils as d_utils

%load_ext autoreload
%autoreload 2
%reload_ext autoreload

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


### Preprocessing Data for DRQN
We take the data from data generator and save them into traces of (s,a,r,sp) tuples.

Each trajectory corresponds to a trace.

If trajectory has length n, then trace will have length n-1. (since we need the next state sp)

In [9]:
data = d_utils.load_data(filename="../synthetic_data/test-n10000-l3-random.pickle")

In [84]:
dqn_data = d_utils.preprocess_data_for_dqn(data, reward_model="sparse")

In [85]:
# Single Trace
print (dqn_data[0])

[[array([ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.]), array([ 1.,  0.,  0.,  0.,  0.]), 0.0, array([ 1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.])], [array([ 1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.]), array([ 0.,  0.,  0.,  0.,  1.]), 0.20000000000000001, array([ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.])]]


In [86]:
# First tuple in a trace
s,a,r,sp = dqn_data[0][0]
print (s)
print (a)
print (r)
print (sp)

[ 0.  0.  0.  0.  0.  0.  0.  0.  0.  1.]
[ 1.  0.  0.  0.  0.]
0.0
[ 1.  0.  0.  0.  0.  0.  0.  0.  0.  0.]


In [87]:
s,a,r,sp = dqn_data[0][1]
print (s)
print (a)
print (r)
print (sp)

[ 1.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
[ 0.  0.  0.  0.  1.]
0.2
[ 0.  0.  0.  0.  0.  0.  0.  0.  0.  1.]


In [88]:
dqn_data_train, dqn_data_test = train_test_split(dqn_data, test_size=0.2)

### Testing individual functions in drqn.py

In [26]:
# Creating a tensorflow graph for the DRQN net (Basically an RNN with real-valed outputs at every timestep)
inputs, q_values = drqn.build_drqn(n_timesteps=2, n_inputdim=10, n_hidden=10, n_actions=5)

In [29]:
# Creating all graph ops for DRQN
# including 
graph_ops = drqn.build_tf_graph_drqn(n_timesteps=2, n_inputdim=10, n_hidden=10, n_actions=5)

In [30]:
print (graph_ops)

{'a': <tf.Tensor 'Placeholder_6:0' shape=(?, 2, 5) dtype=float32>, 'y': <tf.Tensor 'Placeholder_7:0' shape=(?, 2) dtype=float32>, 'q_inputs': <tf.Tensor 'Placeholder_5:0' shape=(?, 2, 10) dtype=float32>, 'q_values': <tf.Tensor 'stack_3:0' shape=(?, 2, 5) dtype=float32>, 'grad_update': <tf.Operation 'RMSProp' type=NoOp>}


Loading DQN RNN model...


In [182]:
drqn_model = drqn.DRQNModel(model_id="test_model_drqn", timesteps=2)
init = tf.global_variables_initializer()
with tf.Session() as session:
    session.run(init)
    drqn.train(session, dqn_data_train, drqn_model)

Loading DQN RNN model...


KeyboardInterrupt: 

In [134]:
print (drqn_model.experience_buffer.sample(batch_sz=1))

[[[array([ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.,  0.])
   array([ 0.,  0.,  0.,  1.,  0.]) 0.0
   array([ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.,  0.])]
  [array([ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.,  0.])
   array([ 0.,  0.,  0.,  0.,  1.]) 0.20000000000000001
   array([ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.])]]]


In [135]:
print (drqn_model.experience_buffer.sample_in_order(batch_sz=1))

[[[array([ 0.,  0.,  0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.])
   array([ 0.,  0.,  0.,  0.,  1.]) 0.0
   array([ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.])]
  [array([ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.])
   array([ 0.,  0.,  0.,  1.,  0.]) 0.20000000000000001
   array([ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.,  0.])]]]


In [160]:
train_batch = drqn_model.experience_buffer.sample_in_order(batch_sz=16)
# train_batch is 
s_batch = train_batch[:,:,0]
r_batch = train_batch[:,:,2]

In [147]:
print (np.array(s_batch[0,0]))

[ 0.  0.  0.  0.  0.  0.  0.  0.  1.  0.]


In [161]:
r_batch_new = stack_batch(r_batch)
print (r_batch_new.shape)

(16, 2)
