In [196]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function


import tensorflow as tf
import tflearn
import numpy as np
from sklearn.model_selection import train_test_split

import drqn
import dataset_utils as d_utils
import utils

%load_ext autoreload
%autoreload 2
%reload_ext autoreload

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


### Preprocessing Data for DRQN
We take the data from data generator and save them into traces of (s,a,r,sp) tuples.

Each trajectory corresponds to a trace.

If trajectory has length n, then trace will have length n-1. (since we need the next state sp)

In [183]:
data = d_utils.load_data(filename="../synthetic_data/test-n10000-l3-random.pickle")

In [84]:
dqn_data = d_utils.preprocess_data_for_dqn(data, reward_model="sparse")

In [85]:
# Single Trace
print (dqn_data[0])

[[array([ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.]), array([ 1.,  0.,  0.,  0.,  0.]), 0.0, array([ 1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.])], [array([ 1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.]), array([ 0.,  0.,  0.,  0.,  1.]), 0.20000000000000001, array([ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.])]]


In [86]:
# First tuple in a trace
s,a,r,sp = dqn_data[0][0]
print (s)
print (a)
print (r)
print (sp)

[ 0.  0.  0.  0.  0.  0.  0.  0.  0.  1.]
[ 1.  0.  0.  0.  0.]
0.0
[ 1.  0.  0.  0.  0.  0.  0.  0.  0.  0.]


In [187]:
# Last tuple
s,a,r,sp = dqn_data[0][-1]
print (s)
print (a)
print (r)
print (sp)

[ 1.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
[ 0.  0.  0.  0.  1.]
0.2
[ 0.  0.  0.  0.  0.  0.  0.  0.  0.  1.]


In [88]:
dqn_data_train, dqn_data_test = train_test_split(dqn_data, test_size=0.2)

### Creating a DRQN model and training it

In [197]:
model_id = "test_model_drqn"

# Directory for storing tensorboard summaries
tensorboard_dir = '../tensorboard_logs/' + model_id + '/'
summary_interval = 100
checkpoint_dir = '../checkpoints/' + model_id + '/'
checkpoint_path = checkpoint_dir + '_/'

utils.check_if_path_exists_or_create(tensorboard_dir)
utils.check_if_path_exists_or_create(checkpoint_dir)
    
checkpoint_interval = 200

In [198]:
drqn_model = drqn.DRQNModel(model_id=model_id, timesteps=2)

Loading DQN RNN model...


In [None]:
init = tf.global_variables_initializer()
saver = tf.train.Saver(max_to_keep=3)
# writer_summary = tf.summary.FileWriter
# histogram_summary = tf.summary.histogram

with tf.Session() as session:
    session.run(init)
    train_buffer = drqn.ExperienceBuffer()
    train_buffer.buffer = dqn_data_train
    train_buffer.buffer_sz = len(train_buffer.buffer)
    drqn.train(drqn_model, session, saver,train_buffer,load_checkpoint=False, ckpt_path=checkpoint_dir)

INFO:tensorflow:Summary name Qmax Value is illegal; using Qmax_Value instead.
Training step: 0
Type is unsupported, or the types of the items don't match field type in CollectionDef.
'list' object has no attribute 'name'
Type is unsupported, or the types of the items don't match field type in CollectionDef.
'list' object has no attribute 'name'
Training step: 1
Training step: 2
Training step: 3
Training step: 4
Training step: 5
Training step: 6
Training step: 7
Training step: 8
Training step: 9
Training step: 10
Training step: 11
Training step: 12
Training step: 13
Training step: 14
Training step: 15
Training step: 16
Training step: 17
Training step: 18
Training step: 19
Training step: 20
Training step: 21
Training step: 22
Training step: 23
Training step: 24
Training step: 25
Training step: 26
Training step: 27
Training step: 28
Training step: 29
Training step: 30
Training step: 31
Training step: 32
Training step: 33
Training step: 34
Training step: 35
Training step: 36
Training step:

In [134]:
print (drqn_model.experience_buffer.sample(batch_sz=1))

[[[array([ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.,  0.])
   array([ 0.,  0.,  0.,  1.,  0.]) 0.0
   array([ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.,  0.])]
  [array([ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.,  0.])
   array([ 0.,  0.,  0.,  0.,  1.]) 0.20000000000000001
   array([ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.])]]]


In [135]:
print (drqn_model.experience_buffer.sample_in_order(batch_sz=1))

[[[array([ 0.,  0.,  0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.])
   array([ 0.,  0.,  0.,  0.,  1.]) 0.0
   array([ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.])]
  [array([ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.])
   array([ 0.,  0.,  0.,  1.,  0.]) 0.20000000000000001
   array([ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.,  0.])]]]


In [160]:
train_batch = drqn_model.experience_buffer.sample_in_order(batch_sz=16)
# train_batch is 
s_batch = train_batch[:,:,0]
r_batch = train_batch[:,:,2]

In [147]:
print (np.array(s_batch[0,0]))

[ 0.  0.  0.  0.  0.  0.  0.  0.  1.  0.]


In [161]:
r_batch_new = stack_batch(r_batch)
print (r_batch_new.shape)

(16, 2)
